mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-12-25 11:41:51 +01:00
[docx] split commit for file 4600
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
parent
f37e76300b
commit
470dc00686
Binary file not shown.
@ -1,205 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
|
||||||
import com.twitter.search.earlybird.config.ServingRange;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.search.queryparser.query.Query;
|
|
||||||
import com.twitter.search.queryparser.query.QueryParserException;
|
|
||||||
import com.twitter.search.queryparser.util.IdTimeRanges;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A Finagle filter used to filter requests to tiers.
|
|
||||||
* Parses serialized query on Earlybird request, and extracts since / until / since_id / max_id
|
|
||||||
* operators. This filter then tests whether the request overlaps with the given tier. If there
|
|
||||||
* is no overlap, an empty response is returned without actually forwarding the requests to the
|
|
||||||
* underlying service.
|
|
||||||
*/
|
|
||||||
public class EarlybirdTimeRangeFilter extends
|
|
||||||
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdTimeRangeFilter.class);
|
|
||||||
|
|
||||||
private static final EarlybirdResponse ERROR_RESPONSE =
|
|
||||||
new EarlybirdResponse(EarlybirdResponseCode.PERSISTENT_ERROR, 0)
|
|
||||||
.setSearchResults(new ThriftSearchResults());
|
|
||||||
|
|
||||||
private final ServingRangeProvider servingRangeProvider;
|
|
||||||
private final Optional<EarlybirdTimeFilterQueryRewriter> queryRewriter;
|
|
||||||
|
|
||||||
private static final Map<EarlybirdRequestType, SearchCounter> FAILED_REQUESTS;
|
|
||||||
static {
|
|
||||||
final Map<EarlybirdRequestType, SearchCounter> tempMap =
|
|
||||||
Maps.newEnumMap(EarlybirdRequestType.class);
|
|
||||||
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
|
||||||
tempMap.put(requestType, SearchCounter.export(
|
|
||||||
"time_range_filter_" + requestType.getNormalizedName() + "_failed_requests"));
|
|
||||||
}
|
|
||||||
FAILED_REQUESTS = Collections.unmodifiableMap(tempMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithQueryRewriter(
|
|
||||||
ServingRangeProvider servingRangeProvider,
|
|
||||||
SearchDecider decider) {
|
|
||||||
|
|
||||||
return new EarlybirdTimeRangeFilter(servingRangeProvider,
|
|
||||||
Optional.of(new EarlybirdTimeFilterQueryRewriter(servingRangeProvider, decider)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithoutQueryRewriter(
|
|
||||||
ServingRangeProvider servingRangeProvider) {
|
|
||||||
|
|
||||||
return new EarlybirdTimeRangeFilter(servingRangeProvider, Optional.empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct a filter that avoids forwarding requests to unrelated tiers
|
|
||||||
* based on requests' since / until / since_id / max_id.
|
|
||||||
* @param provider Holds the boundary information.
|
|
||||||
*/
|
|
||||||
EarlybirdTimeRangeFilter(
|
|
||||||
ServingRangeProvider provider,
|
|
||||||
Optional<EarlybirdTimeFilterQueryRewriter> rewriter) {
|
|
||||||
|
|
||||||
this.servingRangeProvider = provider;
|
|
||||||
this.queryRewriter = rewriter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ServingRangeProvider getServingRangeProvider() {
|
|
||||||
return servingRangeProvider;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
Query parsedQuery = requestContext.getParsedQuery();
|
|
||||||
if (parsedQuery != null) {
|
|
||||||
// Only perform filtering if serialized query is set.
|
|
||||||
try {
|
|
||||||
IdTimeRanges queryRanges = IdTimeRanges.fromQuery(parsedQuery);
|
|
||||||
if (queryRanges == null) {
|
|
||||||
// No time ranges in query.
|
|
||||||
return issueServiceRequest(service, requestContext);
|
|
||||||
}
|
|
||||||
|
|
||||||
ServingRange servingRange =
|
|
||||||
servingRangeProvider.getServingRange(
|
|
||||||
requestContext, requestContext.useOverrideTierConfig());
|
|
||||||
|
|
||||||
if (queryDoesNotOverlapWithServingRange(queryRanges, servingRange)) {
|
|
||||||
return Future.value(tierSkippedResponse(requestContext.getEarlybirdRequestType(),
|
|
||||||
servingRange));
|
|
||||||
} else {
|
|
||||||
return issueServiceRequest(service, requestContext);
|
|
||||||
}
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
LOG.warn("Unable to get IdTimeRanges from query: " + parsedQuery.serialize());
|
|
||||||
// The failure here is not due to a miss-formed query from the client, since we already
|
|
||||||
// were able to successfully get a parsed Query from the request.
|
|
||||||
// If we can't determine the time ranges, pass the query along to the tier, and just
|
|
||||||
// restrict it to the timeranges of the tier.
|
|
||||||
return issueServiceRequest(service, requestContext);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// There's no serialized query. Just pass through like an identity filter.
|
|
||||||
return issueServiceRequest(service, requestContext);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean queryDoesNotOverlapWithServingRange(IdTimeRanges queryRanges,
|
|
||||||
ServingRange servingRange) {
|
|
||||||
// As long as a query overlaps with the tier serving range on either side,
|
|
||||||
// the request is not filtered. I.e. we want to be conservative when doing this filtering,
|
|
||||||
// because it is just an optimization. We ignore the inclusiveness / exclusiveness of the
|
|
||||||
// boundaries. If the tier boundary and the query boundry happen to be the same, we do not
|
|
||||||
// filter the request.
|
|
||||||
return queryRanges.getSinceIDExclusive().or(0L)
|
|
||||||
> servingRange.getServingRangeMaxId()
|
|
||||||
|| queryRanges.getMaxIDInclusive().or(Long.MAX_VALUE)
|
|
||||||
< servingRange.getServingRangeSinceId()
|
|
||||||
|| queryRanges.getSinceTimeInclusive().or(0)
|
|
||||||
> servingRange.getServingRangeUntilTimeSecondsFromEpoch()
|
|
||||||
|| queryRanges.getUntilTimeExclusive().or(Integer.MAX_VALUE)
|
|
||||||
< servingRange.getServingRangeSinceTimeSecondsFromEpoch();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Future<EarlybirdResponse> issueServiceRequest(
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service,
|
|
||||||
EarlybirdRequestContext requestContext) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
EarlybirdRequestContext request = requestContext;
|
|
||||||
if (queryRewriter.isPresent()) {
|
|
||||||
request = queryRewriter.get().rewriteRequest(requestContext);
|
|
||||||
}
|
|
||||||
return service.apply(request);
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
FAILED_REQUESTS.get(requestContext.getEarlybirdRequestType()).increment();
|
|
||||||
String msg = "Failed to add time filter operators";
|
|
||||||
LOG.error(msg, e);
|
|
||||||
|
|
||||||
// Note that in this case it is not clear whether the error is the client's fault or our
|
|
||||||
// fault, so we don't necessarily return a CLIENT_ERROR here.
|
|
||||||
// Currently this actually returns a PERSISTENT_ERROR.
|
|
||||||
if (requestContext.getRequest().getDebugMode() > 0) {
|
|
||||||
return Future.value(
|
|
||||||
ERROR_RESPONSE.deepCopy().setDebugString(msg + ": " + e.getMessage()));
|
|
||||||
} else {
|
|
||||||
return Future.value(ERROR_RESPONSE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a tier skipped response, based on the given request type.
|
|
||||||
*
|
|
||||||
* For recency, relevance, facets and top tweets requests, this method returns a SUCCESS response
|
|
||||||
* with no search results and the minSearchedStatusID and maxSearchedStatusID appropriately set.
|
|
||||||
* For term stats response, it returns a TIER_SKIPPED response, but we need to revisit this.
|
|
||||||
*
|
|
||||||
* @param requestType The type of the request.
|
|
||||||
* @param servingRange The serving range of the tier that we're skipping.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
public static EarlybirdResponse tierSkippedResponse(
|
|
||||||
EarlybirdRequestType requestType,
|
|
||||||
ServingRange servingRange) {
|
|
||||||
String debugMessage =
|
|
||||||
"Tier skipped because it does not intersect with query time boundaries.";
|
|
||||||
if (requestType == EarlybirdRequestType.TERM_STATS) {
|
|
||||||
// If it's a term stats request, return a TIER_SKIPPED response for now.
|
|
||||||
// But we need to figure out the right thing to do here.
|
|
||||||
return new EarlybirdResponse(EarlybirdResponseCode.TIER_SKIPPED, 0)
|
|
||||||
.setDebugString(debugMessage);
|
|
||||||
} else {
|
|
||||||
// minIds in ServingRange instances are set to tierLowerBoundary - 1, because the
|
|
||||||
// since_id operator is exclusive. The max_id operator on the other hand is inclusive,
|
|
||||||
// so maxIds in ServingRange instances are also set to tierUpperBoundary - 1.
|
|
||||||
// Here we want both of them to be inclusive, so we need to increment the minId by 1.
|
|
||||||
return EarlybirdResponseUtil.tierSkippedRootResponse(
|
|
||||||
servingRange.getServingRangeSinceId() + 1,
|
|
||||||
servingRange.getServingRangeMaxId(),
|
|
||||||
debugMessage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,167 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.queryparser.query.Query;
|
|
||||||
import com.twitter.search.queryparser.query.QueryNodeUtils;
|
|
||||||
import com.twitter.search.queryparser.query.QueryParserException;
|
|
||||||
import com.twitter.search.queryparser.query.search.SearchOperator;
|
|
||||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
|
||||||
import com.twitter.search.queryparser.visitors.DropAllProtectedOperatorVisitor;
|
|
||||||
import com.twitter.search.queryparser.visitors.QueryTreeIndex;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Full archive service filter validates requests with a protected operator, appends the
|
|
||||||
* '[exclude protected]' operator by default, and appends '[filter protected]' operator instead if
|
|
||||||
* 'getProtectedTweetsOnly' request param is set. A client error response is returned if any of the
|
|
||||||
* following rules is violated.
|
|
||||||
* 1. There is at most one 'protected' operator in the query.
|
|
||||||
* 2. If there is a 'protected' operator, it must be in the query root node.
|
|
||||||
* 3. The parent node of the 'protected' operator must not be negated and must be a conjunction.
|
|
||||||
* 4. If there is a positive 'protected' operator, 'followedUserIds' and 'searcherId' request
|
|
||||||
* params must be set.
|
|
||||||
*/
|
|
||||||
public class FullArchiveProtectedOperatorFilter extends
|
|
||||||
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private static final Logger LOG =
|
|
||||||
LoggerFactory.getLogger(FullArchiveProtectedOperatorFilter.class);
|
|
||||||
private static final SearchOperator EXCLUDE_PROTECTED_OPERATOR =
|
|
||||||
new SearchOperator(SearchOperator.Type.EXCLUDE, SearchOperatorConstants.PROTECTED);
|
|
||||||
private static final SearchOperator FILTER_PROTECTED_OPERATOR =
|
|
||||||
new SearchOperator(SearchOperator.Type.FILTER, SearchOperatorConstants.PROTECTED);
|
|
||||||
private static final SearchCounter QUERY_PARSER_FAILURE_COUNT =
|
|
||||||
SearchCounter.export("protected_operator_filter_query_parser_failure_count");
|
|
||||||
|
|
||||||
private final DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor;
|
|
||||||
private final SearchDecider decider;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public FullArchiveProtectedOperatorFilter(
|
|
||||||
DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor,
|
|
||||||
SearchDecider decider) {
|
|
||||||
this.dropProtectedOperatorVisitor = dropProtectedOperatorVisitor;
|
|
||||||
this.decider = decider;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
Query query = requestContext.getParsedQuery();
|
|
||||||
if (query == null) {
|
|
||||||
return service.apply(requestContext);
|
|
||||||
}
|
|
||||||
|
|
||||||
QueryTreeIndex queryTreeIndex = QueryTreeIndex.buildFor(query);
|
|
||||||
List<Query> nodeList = queryTreeIndex.getNodeList();
|
|
||||||
// try to find a protected operator, returns error response if more than one protected
|
|
||||||
// operator is detected
|
|
||||||
SearchOperator protectedOperator = null;
|
|
||||||
for (Query node : nodeList) {
|
|
||||||
if (node instanceof SearchOperator) {
|
|
||||||
SearchOperator searchOp = (SearchOperator) node;
|
|
||||||
if (SearchOperatorConstants.PROTECTED.equals(searchOp.getOperand())) {
|
|
||||||
if (protectedOperator == null) {
|
|
||||||
protectedOperator = searchOp;
|
|
||||||
} else {
|
|
||||||
return createErrorResponse("Only one 'protected' operator is expected.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Query processedQuery;
|
|
||||||
if (protectedOperator == null) {
|
|
||||||
// no protected operator is detected, append '[exclude protected]' by default
|
|
||||||
processedQuery = QueryNodeUtils.appendAsConjunction(query, EXCLUDE_PROTECTED_OPERATOR);
|
|
||||||
} else {
|
|
||||||
// protected operator must be in the query root node
|
|
||||||
if (queryTreeIndex.getParentOf(protectedOperator) != query) {
|
|
||||||
return createErrorResponse("'protected' operator must be in the query root node");
|
|
||||||
}
|
|
||||||
// the query node that contains protected operator must not be negated
|
|
||||||
if (query.mustNotOccur()) {
|
|
||||||
return createErrorResponse("The query node that contains a 'protected' operator must not"
|
|
||||||
+ " be negated.");
|
|
||||||
}
|
|
||||||
// the query node that contains protected operator must be a conjunction
|
|
||||||
if (!query.isTypeOf(Query.QueryType.CONJUNCTION)) {
|
|
||||||
return createErrorResponse("The query node that contains a 'protected' operator must"
|
|
||||||
+ " be a conjunction.");
|
|
||||||
}
|
|
||||||
// check the existence of 'followedUserIds' and 'searcherId' if it is a positive operator
|
|
||||||
if (isPositive(protectedOperator)) {
|
|
||||||
if (!validateRequestParam(requestContext.getRequest())) {
|
|
||||||
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
|
|
||||||
+ "by positive 'protected' operator.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
processedQuery = query;
|
|
||||||
}
|
|
||||||
// update processedQuery if 'getProtectedTweetsOnly' is set to true, it takes precedence over
|
|
||||||
// the existing protected operators
|
|
||||||
if (requestContext.getRequest().isGetProtectedTweetsOnly()) {
|
|
||||||
if (!validateRequestParam(requestContext.getRequest())) {
|
|
||||||
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
|
|
||||||
+ "when 'getProtectedTweetsOnly' is set to true.");
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
processedQuery = processedQuery.accept(dropProtectedOperatorVisitor);
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
// this should not happen since we already have a parsed query
|
|
||||||
QUERY_PARSER_FAILURE_COUNT.increment();
|
|
||||||
LOG.warn(
|
|
||||||
"Failed to drop protected operator for serialized query: " + query.serialize(), e);
|
|
||||||
}
|
|
||||||
processedQuery =
|
|
||||||
QueryNodeUtils.appendAsConjunction(processedQuery, FILTER_PROTECTED_OPERATOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (processedQuery == query) {
|
|
||||||
return service.apply(requestContext);
|
|
||||||
} else {
|
|
||||||
EarlybirdRequestContext clonedRequestContext =
|
|
||||||
EarlybirdRequestContext.copyRequestContext(requestContext, processedQuery);
|
|
||||||
return service.apply(clonedRequestContext);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean validateRequestParam(EarlybirdRequest request) {
|
|
||||||
List<Long> followedUserIds = request.followedUserIds;
|
|
||||||
Long searcherId = (request.searchQuery != null && request.searchQuery.isSetSearcherId())
|
|
||||||
? request.searchQuery.getSearcherId() : null;
|
|
||||||
return followedUserIds != null && !followedUserIds.isEmpty() && searcherId != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isPositive(SearchOperator searchOp) {
|
|
||||||
boolean isNegateExclude = searchOp.mustNotOccur()
|
|
||||||
&& searchOp.getOperatorType() == SearchOperator.Type.EXCLUDE;
|
|
||||||
boolean isPositive = !searchOp.mustNotOccur()
|
|
||||||
&& (searchOp.getOperatorType() == SearchOperator.Type.INCLUDE
|
|
||||||
|| searchOp.getOperatorType() == SearchOperator.Type.FILTER);
|
|
||||||
return isNegateExclude || isPositive;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Future<EarlybirdResponse> createErrorResponse(String errorMsg) {
|
|
||||||
EarlybirdResponse response = new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0);
|
|
||||||
response.setDebugInfo(new EarlybirdDebugInfo().setHost("full_archive_root"));
|
|
||||||
response.setDebugString(errorMsg);
|
|
||||||
return Future.value(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Binary file not shown.
@ -1,64 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
|
||||||
import com.twitter.search.common.util.date.DateUtil;
|
|
||||||
import com.twitter.search.earlybird.config.ServingRange;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
|
|
||||||
public class FullArchiveServingRangeProvider implements ServingRangeProvider {
|
|
||||||
|
|
||||||
public static final Date FULL_ARCHIVE_START_DATE = DateUtil.toDate(2006, 3, 21);
|
|
||||||
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 48;
|
|
||||||
|
|
||||||
private final SearchDecider decider;
|
|
||||||
private final String deciderKey;
|
|
||||||
|
|
||||||
public FullArchiveServingRangeProvider(
|
|
||||||
SearchDecider decider, String deciderKey) {
|
|
||||||
this.decider = decider;
|
|
||||||
this.deciderKey = deciderKey;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ServingRange getServingRange(
|
|
||||||
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
|
|
||||||
return new ServingRange() {
|
|
||||||
@Override
|
|
||||||
public long getServingRangeSinceId() {
|
|
||||||
// we use 1 instead of 0, because the since_id operator is inclusive in earlybirds.
|
|
||||||
return 1L;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeMaxId() {
|
|
||||||
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
|
|
||||||
(decider.featureExists(deciderKey))
|
|
||||||
? decider.getAvailability(deciderKey)
|
|
||||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
|
||||||
|
|
||||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
|
|
||||||
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeSinceTimeSecondsFromEpoch() {
|
|
||||||
return FULL_ARCHIVE_START_DATE.getTime() / 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeUntilTimeSecondsFromEpoch() {
|
|
||||||
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
|
|
||||||
(decider.featureExists(deciderKey))
|
|
||||||
? decider.getAvailability(deciderKey)
|
|
||||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
|
||||||
|
|
||||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
|
|
||||||
return boundaryTime / 1000;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,66 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
|
|
||||||
import com.twitter.common.util.Clock;
|
|
||||||
import com.twitter.finagle.Filter;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.common.EarlybirdRequestUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
|
|
||||||
import com.twitter.search.earlybird_root.common.TwitterContextProvider;
|
|
||||||
import com.twitter.search.queryparser.query.QueryParserException;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new RequestContext from an EarlybirdRequest, and passes the RequestContext down to
|
|
||||||
* the rest of the filter/service chain.
|
|
||||||
*/
|
|
||||||
public class InitializeRequestContextFilter extends
|
|
||||||
Filter<EarlybirdRequest, EarlybirdResponse, EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchCounter FAILED_QUERY_PARSING =
|
|
||||||
SearchCounter.export("initialize_request_context_filter_query_parsing_failure");
|
|
||||||
|
|
||||||
private final SearchDecider decider;
|
|
||||||
private final TwitterContextProvider twitterContextProvider;
|
|
||||||
private final Clock clock;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The constructor of the filter.
|
|
||||||
*/
|
|
||||||
@Inject
|
|
||||||
public InitializeRequestContextFilter(SearchDecider decider,
|
|
||||||
TwitterContextProvider twitterContextProvider,
|
|
||||||
Clock clock) {
|
|
||||||
this.decider = decider;
|
|
||||||
this.twitterContextProvider = twitterContextProvider;
|
|
||||||
this.clock = clock;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
EarlybirdRequestUtil.recordClientClockDiff(request);
|
|
||||||
|
|
||||||
EarlybirdRequestContext requestContext;
|
|
||||||
try {
|
|
||||||
requestContext = EarlybirdRequestContext.newContext(
|
|
||||||
request, decider, twitterContextProvider.get(), clock);
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
FAILED_QUERY_PARSING.increment();
|
|
||||||
return QueryParsingUtils.newClientErrorResponse(request, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return service.apply(requestContext);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,80 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.EnumMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filter tracks the isUserProtected metadata stats returned from Earlybirds.
|
|
||||||
*/
|
|
||||||
public class IsUserProtectedMetadataTrackingFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private static final String COUNTER_PREFIX = "is_user_protected_metadata_count_filter_";
|
|
||||||
@VisibleForTesting
|
|
||||||
final Map<EarlybirdRequestType, SearchCounter> totalCounterByRequestTypeMap;
|
|
||||||
@VisibleForTesting
|
|
||||||
final Map<EarlybirdRequestType, SearchCounter> isProtectedCounterByRequestTypeMap;
|
|
||||||
|
|
||||||
public IsUserProtectedMetadataTrackingFilter() {
|
|
||||||
this.totalCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
|
|
||||||
this.isProtectedCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
|
|
||||||
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
|
||||||
this.totalCounterByRequestTypeMap.put(requestType,
|
|
||||||
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_total"));
|
|
||||||
this.isProtectedCounterByRequestTypeMap.put(requestType,
|
|
||||||
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_is_protected"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext request,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
Future<EarlybirdResponse> response = service.apply(request);
|
|
||||||
|
|
||||||
EarlybirdRequestType requestType = request.getEarlybirdRequestType();
|
|
||||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public void onSuccess(EarlybirdResponse response) {
|
|
||||||
if (!response.isSetSearchResults() || response.getSearchResults().getResults().isEmpty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
List<ThriftSearchResult> searchResults = response.getSearchResults().getResults();
|
|
||||||
int totalCount = searchResults.size();
|
|
||||||
int isUserProtectedCount = 0;
|
|
||||||
for (ThriftSearchResult searchResult : searchResults) {
|
|
||||||
if (searchResult.isSetMetadata() && searchResult.getMetadata().isSetExtraMetadata()) {
|
|
||||||
ThriftSearchResultExtraMetadata extraMetadata =
|
|
||||||
searchResult.getMetadata().getExtraMetadata();
|
|
||||||
if (extraMetadata.isIsUserProtected()) {
|
|
||||||
isUserProtectedCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
IsUserProtectedMetadataTrackingFilter.this
|
|
||||||
.totalCounterByRequestTypeMap.get(requestType).add(totalCount);
|
|
||||||
IsUserProtectedMetadataTrackingFilter.this
|
|
||||||
.isProtectedCounterByRequestTypeMap.get(requestType).add(isUserProtectedCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable cause) { }
|
|
||||||
});
|
|
||||||
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Binary file not shown.
@ -1,49 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.util.Function;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class MarkTweetSourceFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private final SearchCounter searchResultsNotSet;
|
|
||||||
|
|
||||||
private final ThriftTweetSource tweetSource;
|
|
||||||
|
|
||||||
public MarkTweetSourceFilter(ThriftTweetSource tweetSource) {
|
|
||||||
this.tweetSource = tweetSource;
|
|
||||||
searchResultsNotSet = SearchCounter.export(
|
|
||||||
tweetSource.name().toLowerCase() + "_mark_tweet_source_filter_search_results_not_set");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
final EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
return service.apply(requestContext).map(new Function<EarlybirdResponse, EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public EarlybirdResponse apply(EarlybirdResponse response) {
|
|
||||||
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS
|
|
||||||
&& requestContext.getEarlybirdRequestType() != EarlybirdRequestType.TERM_STATS) {
|
|
||||||
if (!response.isSetSearchResults()) {
|
|
||||||
searchResultsNotSet.increment();
|
|
||||||
} else {
|
|
||||||
for (ThriftSearchResult searchResult : response.getSearchResults().getResults()) {
|
|
||||||
searchResult.setTweetSource(tweetSource);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,119 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.common.cache.CacheLoader;
|
|
||||||
import com.google.common.cache.LoadingCache;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchMovingAverage;
|
|
||||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filter that is tracking the engagement stats returned from Earlybirds.
|
|
||||||
*/
|
|
||||||
public class MetadataTrackingFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private static final String SCORING_SIGNAL_STAT_PREFIX = "scoring_signal_";
|
|
||||||
private static final String SCORE_STAT_PATTERN = "client_id_score_tracker_for_%s_x100";
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchMovingAverage SCORING_SIGNAL_FAV_COUNT =
|
|
||||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "fav_count");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchMovingAverage SCORING_SIGNAL_REPLY_COUNT =
|
|
||||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "reply_count");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchMovingAverage SCORING_SIGNAL_RETWEET_COUNT =
|
|
||||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "retweet_count");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final LoadingCache<String, SearchMovingAverage> CLIENT_SCORE_METRICS_LOADING_CACHE =
|
|
||||||
CacheBuilder.newBuilder().build(new CacheLoader<String, SearchMovingAverage>() {
|
|
||||||
public SearchMovingAverage load(String clientId) {
|
|
||||||
return SearchMovingAverage.export(String.format(SCORE_STAT_PATTERN, clientId));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(final EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
Future<EarlybirdResponse> response = service.apply(request);
|
|
||||||
|
|
||||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public void onSuccess(EarlybirdResponse earlybirdResponse) {
|
|
||||||
EarlybirdRequestType type = EarlybirdRequestType.of(request);
|
|
||||||
|
|
||||||
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
|
|
||||||
&& type == EarlybirdRequestType.RELEVANCE
|
|
||||||
&& earlybirdResponse.isSetSearchResults()
|
|
||||||
&& earlybirdResponse.getSearchResults().isSetResults()) {
|
|
||||||
|
|
||||||
List<ThriftSearchResult> searchResults = earlybirdResponse.getSearchResults()
|
|
||||||
.getResults();
|
|
||||||
|
|
||||||
long totalFavoriteAmount = 0;
|
|
||||||
long totalReplyAmount = 0;
|
|
||||||
long totalRetweetAmount = 0;
|
|
||||||
double totalScoreX100 = 0;
|
|
||||||
|
|
||||||
for (ThriftSearchResult result : searchResults) {
|
|
||||||
if (!result.isSetMetadata()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftSearchResultMetadata metadata = result.getMetadata();
|
|
||||||
|
|
||||||
if (metadata.isSetFavCount()) {
|
|
||||||
totalFavoriteAmount += metadata.getFavCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (metadata.isSetReplyCount()) {
|
|
||||||
totalReplyAmount += metadata.getReplyCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (metadata.isSetRetweetCount()) {
|
|
||||||
totalRetweetAmount += metadata.getRetweetCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (metadata.isSetScore()) {
|
|
||||||
// Scale up the score by 100 so that scores are at least 1 and visible on viz graph
|
|
||||||
totalScoreX100 += metadata.getScore() * 100;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We only count present engagement counts but report the full size of the search results.
|
|
||||||
// This means that we consider the missing counts as being 0.
|
|
||||||
SCORING_SIGNAL_FAV_COUNT.addSamples(totalFavoriteAmount, searchResults.size());
|
|
||||||
SCORING_SIGNAL_REPLY_COUNT.addSamples(totalReplyAmount, searchResults.size());
|
|
||||||
SCORING_SIGNAL_RETWEET_COUNT.addSamples(totalRetweetAmount, searchResults.size());
|
|
||||||
// Export per client id average scores.
|
|
||||||
String requestClientId = ClientIdUtil.getClientIdFromRequest(request);
|
|
||||||
String quotaClientId = ClientIdUtil.getQuotaClientId(requestClientId);
|
|
||||||
CLIENT_SCORE_METRICS_LOADING_CACHE.getUnchecked(quotaClientId)
|
|
||||||
.addSamples((long) totalScoreX100, searchResults.size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable cause) { }
|
|
||||||
});
|
|
||||||
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,45 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.ConcurrentMap;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.Percentile;
|
|
||||||
import com.twitter.search.common.metrics.PercentileUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class NamedMultiTermDisjunctionStatsFilter extends
|
|
||||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private static final String STAT_FORMAT = "named_disjunction_size_client_%s_key_%s";
|
|
||||||
// ClientID -> disjunction name -> operand count
|
|
||||||
private static final ConcurrentMap<String, ConcurrentMap<String, Percentile<Integer>>>
|
|
||||||
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
if (request.getSearchQuery().isSetNamedDisjunctionMap()) {
|
|
||||||
for (Map.Entry<String, List<Long>> entry
|
|
||||||
: request.getSearchQuery().getNamedDisjunctionMap().entrySet()) {
|
|
||||||
|
|
||||||
Map<String, Percentile<Integer>> statsForClient =
|
|
||||||
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT.computeIfAbsent(
|
|
||||||
request.getClientId(), clientId -> new ConcurrentHashMap<>());
|
|
||||||
Percentile<Integer> stats = statsForClient.computeIfAbsent(entry.getKey(),
|
|
||||||
keyName -> PercentileUtil.createPercentile(
|
|
||||||
String.format(STAT_FORMAT, request.getClientId(), keyName)));
|
|
||||||
|
|
||||||
stats.record(entry.getValue().size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return service.apply(request);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,81 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
|
||||||
import com.twitter.search.queryparser.visitors.DetectPositiveOperatorVisitor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filter that is tracking the unexpected nullcast results from Earlybirds.
|
|
||||||
*/
|
|
||||||
public class NullcastTrackingFilter extends SensitiveResultsTrackingFilter {
|
|
||||||
public NullcastTrackingFilter() {
|
|
||||||
super("unexpected nullcast tweets", true);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(NullcastTrackingFilter.class);
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchCounter BAD_NULLCAST_QUERY_COUNT =
|
|
||||||
SearchCounter.export("unexpected_nullcast_query_count");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchCounter BAD_NULLCAST_RESULT_COUNT =
|
|
||||||
SearchCounter.export("unexpected_nullcast_result_count");
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Logger getLogger() {
|
|
||||||
return LOG;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchCounter getSensitiveQueryCounter() {
|
|
||||||
return BAD_NULLCAST_QUERY_COUNT;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchCounter getSensitiveResultsCounter() {
|
|
||||||
return BAD_NULLCAST_RESULT_COUNT;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Set<Long> getSensitiveResults(EarlybirdRequestContext requestContext,
|
|
||||||
EarlybirdResponse earlybirdResponse) throws Exception {
|
|
||||||
if (!requestContext.getParsedQuery().accept(
|
|
||||||
new DetectPositiveOperatorVisitor(SearchOperatorConstants.NULLCAST))) {
|
|
||||||
return EarlybirdResponseUtil.findUnexpectedNullcastStatusIds(
|
|
||||||
earlybirdResponse.getSearchResults(), requestContext.getRequest());
|
|
||||||
} else {
|
|
||||||
return new HashSet<>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Some Earlybird requests are not searches, instead, they are scoring requests.
|
|
||||||
* These requests supply a list of IDs to be scored.
|
|
||||||
* It is OK to return nullcast tweet result if the ID is supplied in the request.
|
|
||||||
* This extracts the scoring request tweet IDs.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected Set<Long> getExceptedResults(EarlybirdRequestContext requestContext) {
|
|
||||||
EarlybirdRequest request = requestContext.getRequest();
|
|
||||||
if (request == null
|
|
||||||
|| !request.isSetSearchQuery()
|
|
||||||
|| request.getSearchQuery().getSearchStatusIdsSize() == 0) {
|
|
||||||
return ImmutableSet.of();
|
|
||||||
}
|
|
||||||
return request.getSearchQuery().getSearchStatusIds();
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,10 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
public class PostCacheRequestTypeCountFilter extends RequestTypeCountFilter {
|
|
||||||
@Inject
|
|
||||||
public PostCacheRequestTypeCountFilter() {
|
|
||||||
super("post_cache");
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,10 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
public class PreCacheRequestTypeCountFilter extends RequestTypeCountFilter {
|
|
||||||
@Inject
|
|
||||||
public PreCacheRequestTypeCountFilter() {
|
|
||||||
super("pre_cache");
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,114 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import javax.inject.Inject;
|
|
||||||
import javax.inject.Singleton;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
import com.twitter.common.text.language.LocaleUtil;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.lang.ThriftLanguageUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Export stats for query languages.
|
|
||||||
*/
|
|
||||||
@Singleton
|
|
||||||
public class QueryLangStatFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
public static class Config {
|
|
||||||
// We put a limit here in case an error in the client are sending us random lang codes.
|
|
||||||
private int maxNumberOfLangs;
|
|
||||||
|
|
||||||
public Config(int maxNumberOfLangs) {
|
|
||||||
this.maxNumberOfLangs = maxNumberOfLangs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getMaxNumberOfLangs() {
|
|
||||||
return maxNumberOfLangs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
protected static final String LANG_STATS_PREFIX = "num_queries_in_lang_";
|
|
||||||
|
|
||||||
private final Config config;
|
|
||||||
private final SearchCounter allCountsForLangsOverMaxNumLang =
|
|
||||||
SearchCounter.export(LANG_STATS_PREFIX + "overflow");
|
|
||||||
|
|
||||||
private final ConcurrentHashMap<String, SearchCounter> langCounters =
|
|
||||||
new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public QueryLangStatFilter(Config config) {
|
|
||||||
this.config = config;
|
|
||||||
}
|
|
||||||
|
|
||||||
private SearchCounter getCounter(String lang) {
|
|
||||||
Preconditions.checkNotNull(lang);
|
|
||||||
|
|
||||||
SearchCounter counter = langCounters.get(lang);
|
|
||||||
if (counter == null) {
|
|
||||||
if (langCounters.size() >= config.getMaxNumberOfLangs()) {
|
|
||||||
return allCountsForLangsOverMaxNumLang;
|
|
||||||
}
|
|
||||||
synchronized (langCounters) { // This double-checked locking is safe,
|
|
||||||
// since we're using a ConcurrentHashMap
|
|
||||||
counter = langCounters.get(lang);
|
|
||||||
if (counter == null) {
|
|
||||||
counter = SearchCounter.export(LANG_STATS_PREFIX + lang);
|
|
||||||
langCounters.put(lang, counter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
String lang = null;
|
|
||||||
|
|
||||||
ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
|
||||||
|
|
||||||
lang = searchQuery.getQueryLang();
|
|
||||||
|
|
||||||
if (lang == null) {
|
|
||||||
// fallback to ui lang
|
|
||||||
lang = searchQuery.getUiLang();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lang == null && searchQuery.isSetUserLangs()) {
|
|
||||||
// fallback to the user lang with the highest confidence
|
|
||||||
double maxConfidence = Double.MIN_VALUE;
|
|
||||||
|
|
||||||
for (Map.Entry<ThriftLanguage, Double> entry : searchQuery.getUserLangs().entrySet()) {
|
|
||||||
if (entry.getValue() > maxConfidence) {
|
|
||||||
lang = ThriftLanguageUtil.getLanguageCodeOf(entry.getKey());
|
|
||||||
maxConfidence = entry.getValue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lang == null) {
|
|
||||||
lang = LocaleUtil.UNDETERMINED_LANGUAGE;
|
|
||||||
}
|
|
||||||
|
|
||||||
getCounter(lang).increment();
|
|
||||||
|
|
||||||
return service.apply(requestContext);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,194 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.EnumSet;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import scala.runtime.BoxedUnit;
|
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimer;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.queryparser.query.Query;
|
|
||||||
import com.twitter.search.queryparser.query.QueryParserException;
|
|
||||||
import com.twitter.search.queryparser.query.annotation.Annotation;
|
|
||||||
import com.twitter.search.queryparser.query.search.SearchOperator;
|
|
||||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
|
||||||
import com.twitter.search.queryparser.visitors.DetectAnnotationVisitor;
|
|
||||||
import com.twitter.search.queryparser.visitors.DetectVisitor;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For a given query, increments counters if that query has a number of search operators or
|
|
||||||
* annotations applied to it. Used to detect unusual traffic patterns.
|
|
||||||
*/
|
|
||||||
public class QueryOperatorStatFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(QueryOperatorStatFilter.class);
|
|
||||||
|
|
||||||
private final SearchCounter numQueryOperatorDetectionErrors =
|
|
||||||
SearchCounter.export("query_operator_detection_errors");
|
|
||||||
|
|
||||||
private final SearchCounter numQueryOperatorConsideredRequests =
|
|
||||||
SearchCounter.export("query_operator_requests_considered");
|
|
||||||
|
|
||||||
private final ImmutableMap<String, SearchTimerStats> filterOperatorStats;
|
|
||||||
|
|
||||||
// Keeps track of the number of queries with a filter applied, whose type we don't care about.
|
|
||||||
private final SearchCounter numUnknownFilterOperatorRequests =
|
|
||||||
SearchCounter.export("query_operator_filter_unknown_requests");
|
|
||||||
|
|
||||||
private final ImmutableMap<String, SearchTimerStats> includeOperatorStats;
|
|
||||||
|
|
||||||
// Keeps track of the number of queries with an include operator applied, whose type we don't
|
|
||||||
// know about.
|
|
||||||
private final SearchCounter numUnknownIncludeOperatorRequests =
|
|
||||||
SearchCounter.export("query_operator_include_unknown_requests");
|
|
||||||
|
|
||||||
private final ImmutableMap<SearchOperator.Type, SearchTimerStats> operatorTypeStats;
|
|
||||||
|
|
||||||
private final SearchCounter numVariantRequests =
|
|
||||||
SearchCounter.export("query_operator_variant_requests");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct this QueryOperatorStatFilter by getting the complete set of possible filters a query
|
|
||||||
* might have and associating each with a counter.
|
|
||||||
*/
|
|
||||||
public QueryOperatorStatFilter() {
|
|
||||||
|
|
||||||
ImmutableMap.Builder<String, SearchTimerStats> filterBuilder = new ImmutableMap.Builder<>();
|
|
||||||
for (String operand : SearchOperatorConstants.VALID_FILTER_OPERANDS) {
|
|
||||||
filterBuilder.put(
|
|
||||||
operand,
|
|
||||||
SearchTimerStats.export(
|
|
||||||
"query_operator_filter_" + operand + "_requests",
|
|
||||||
TimeUnit.MILLISECONDS,
|
|
||||||
false,
|
|
||||||
true));
|
|
||||||
}
|
|
||||||
filterOperatorStats = filterBuilder.build();
|
|
||||||
|
|
||||||
ImmutableMap.Builder<String, SearchTimerStats> includeBuilder = new ImmutableMap.Builder<>();
|
|
||||||
for (String operand : SearchOperatorConstants.VALID_INCLUDE_OPERANDS) {
|
|
||||||
includeBuilder.put(
|
|
||||||
operand,
|
|
||||||
SearchTimerStats.export(
|
|
||||||
"query_operator_include_" + operand + "_requests",
|
|
||||||
TimeUnit.MILLISECONDS,
|
|
||||||
false,
|
|
||||||
true));
|
|
||||||
}
|
|
||||||
includeOperatorStats = includeBuilder.build();
|
|
||||||
|
|
||||||
ImmutableMap.Builder<SearchOperator.Type, SearchTimerStats> operatorBuilder =
|
|
||||||
new ImmutableMap.Builder<>();
|
|
||||||
for (SearchOperator.Type operatorType : SearchOperator.Type.values()) {
|
|
||||||
operatorBuilder.put(
|
|
||||||
operatorType,
|
|
||||||
SearchTimerStats.export(
|
|
||||||
"query_operator_" + operatorType.name().toLowerCase() + "_requests",
|
|
||||||
TimeUnit.MILLISECONDS,
|
|
||||||
false,
|
|
||||||
true
|
|
||||||
));
|
|
||||||
}
|
|
||||||
operatorTypeStats = operatorBuilder.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
numQueryOperatorConsideredRequests.increment();
|
|
||||||
Query parsedQuery = requestContext.getParsedQuery();
|
|
||||||
|
|
||||||
if (parsedQuery == null) {
|
|
||||||
return service.apply(requestContext);
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchTimer timer = new SearchTimer();
|
|
||||||
timer.start();
|
|
||||||
|
|
||||||
return service.apply(requestContext).ensure(() -> {
|
|
||||||
timer.stop();
|
|
||||||
|
|
||||||
try {
|
|
||||||
updateTimersForOperatorsAndOperands(parsedQuery, timer);
|
|
||||||
updateCountersIfVariantAnnotation(parsedQuery);
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
LOG.warn("Unable to test if query has operators defined", e);
|
|
||||||
numQueryOperatorDetectionErrors.increment();
|
|
||||||
}
|
|
||||||
return BoxedUnit.UNIT;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tracks request stats for operators and operands.
|
|
||||||
*
|
|
||||||
* @param parsedQuery the query to check.
|
|
||||||
*/
|
|
||||||
private void updateTimersForOperatorsAndOperands(Query parsedQuery, SearchTimer timer)
|
|
||||||
throws QueryParserException {
|
|
||||||
final DetectVisitor detectVisitor = new DetectVisitor(false, SearchOperator.Type.values());
|
|
||||||
parsedQuery.accept(detectVisitor);
|
|
||||||
|
|
||||||
Set<SearchOperator.Type> detectedOperatorTypes = EnumSet.noneOf(SearchOperator.Type.class);
|
|
||||||
for (Query query : detectVisitor.getDetectedQueries()) {
|
|
||||||
// This detectVisitor only matches on SearchOperators.
|
|
||||||
SearchOperator operator = (SearchOperator) query;
|
|
||||||
SearchOperator.Type operatorType = operator.getOperatorType();
|
|
||||||
detectedOperatorTypes.add(operatorType);
|
|
||||||
|
|
||||||
if (operatorType == SearchOperator.Type.INCLUDE) {
|
|
||||||
updateOperandStats(
|
|
||||||
operator,
|
|
||||||
includeOperatorStats,
|
|
||||||
timer,
|
|
||||||
numUnknownIncludeOperatorRequests);
|
|
||||||
}
|
|
||||||
if (operatorType == SearchOperator.Type.FILTER) {
|
|
||||||
updateOperandStats(
|
|
||||||
operator,
|
|
||||||
filterOperatorStats,
|
|
||||||
timer,
|
|
||||||
numUnknownFilterOperatorRequests);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (SearchOperator.Type type : detectedOperatorTypes) {
|
|
||||||
operatorTypeStats.get(type).stoppedTimerIncrement(timer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateOperandStats(
|
|
||||||
SearchOperator operator,
|
|
||||||
ImmutableMap<String, SearchTimerStats> operandRequestStats,
|
|
||||||
SearchTimer timer,
|
|
||||||
SearchCounter unknownOperandStat) {
|
|
||||||
String operand = operator.getOperand();
|
|
||||||
SearchTimerStats stats = operandRequestStats.get(operand);
|
|
||||||
|
|
||||||
if (stats != null) {
|
|
||||||
stats.stoppedTimerIncrement(timer);
|
|
||||||
} else {
|
|
||||||
unknownOperandStat.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateCountersIfVariantAnnotation(Query parsedQuery) throws QueryParserException {
|
|
||||||
DetectAnnotationVisitor visitor = new DetectAnnotationVisitor(Annotation.Type.VARIANT);
|
|
||||||
if (parsedQuery.accept(visitor)) {
|
|
||||||
numVariantRequests.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,92 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import com.twitter.common_internal.text.version.PenguinVersion;
|
|
||||||
import com.twitter.common_internal.text.version.PenguinVersionConfig;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.finagle.tracing.Trace;
|
|
||||||
import com.twitter.finagle.tracing.Tracing;
|
|
||||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimer;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
|
|
||||||
import com.twitter.search.queryparser.parser.SerializedQueryParser;
|
|
||||||
import com.twitter.search.queryparser.parser.SerializedQueryParser.TokenizationOption;
|
|
||||||
import com.twitter.search.queryparser.query.Query;
|
|
||||||
import com.twitter.search.queryparser.query.QueryParserException;
|
|
||||||
import com.twitter.util.Duration;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class QueryTokenizerFilter extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private static final String PREFIX = "query_tokenizer_";
|
|
||||||
private static final SearchRateCounter SUCCESS_COUNTER =
|
|
||||||
SearchRateCounter.export(PREFIX + "success");
|
|
||||||
private static final SearchRateCounter FAILURE_COUNTER =
|
|
||||||
SearchRateCounter.export(PREFIX + "error");
|
|
||||||
private static final SearchRateCounter SKIPPED_COUNTER =
|
|
||||||
SearchRateCounter.export(PREFIX + "skipped");
|
|
||||||
private static final SearchTimerStats QUERY_TOKENIZER_TIME =
|
|
||||||
SearchTimerStats.export(PREFIX + "time", TimeUnit.MILLISECONDS, false);
|
|
||||||
|
|
||||||
private final TokenizationOption tokenizationOption;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public QueryTokenizerFilter(PenguinVersionConfig penguinversions) {
|
|
||||||
PenguinVersion[] supportedVersions = penguinversions
|
|
||||||
.getSupportedVersions().toArray(new PenguinVersion[0]);
|
|
||||||
tokenizationOption = new TokenizationOption(true, supportedVersions);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
if (!requestContext.getRequest().isRetokenizeSerializedQuery()
|
|
||||||
|| !requestContext.getRequest().isSetSearchQuery()
|
|
||||||
|| !requestContext.getRequest().getSearchQuery().isSetSerializedQuery()) {
|
|
||||||
SKIPPED_COUNTER.increment();
|
|
||||||
return service.apply(requestContext);
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchTimer timer = QUERY_TOKENIZER_TIME.startNewTimer();
|
|
||||||
try {
|
|
||||||
String serializedQuery = requestContext.getRequest().getSearchQuery().getSerializedQuery();
|
|
||||||
Query parsedQuery = reparseQuery(serializedQuery);
|
|
||||||
SUCCESS_COUNTER.increment();
|
|
||||||
return service.apply(EarlybirdRequestContext.copyRequestContext(requestContext, parsedQuery));
|
|
||||||
} catch (QueryParserException e) {
|
|
||||||
FAILURE_COUNTER.increment();
|
|
||||||
return QueryParsingUtils.newClientErrorResponse(requestContext.getRequest(), e);
|
|
||||||
} finally {
|
|
||||||
long elapsed = timer.stop();
|
|
||||||
QUERY_TOKENIZER_TIME.timerIncrement(elapsed);
|
|
||||||
Tracing trace = Trace.apply();
|
|
||||||
if (trace.isActivelyTracing()) {
|
|
||||||
trace.record(PREFIX + "time", Duration.fromMilliseconds(elapsed));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public Query reparseQuery(String serializedQuery) throws QueryParserException {
|
|
||||||
SerializedQueryParser parser = new SerializedQueryParser(tokenizationOption);
|
|
||||||
return parser.parse(serializedQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializing the query parser can take many seconds. We initialize it at warmup so that
|
|
||||||
* requests don't time out after we join the serverset. SEARCH-28801
|
|
||||||
*/
|
|
||||||
public void performExpensiveInitialization() throws QueryParserException {
|
|
||||||
SerializedQueryParser queryParser = new SerializedQueryParser(tokenizationOption);
|
|
||||||
|
|
||||||
// The Korean query parser takes a few seconds on it's own to initialize.
|
|
||||||
String koreanQuery = "스포츠";
|
|
||||||
queryParser.parse(koreanQuery);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,60 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
|
||||||
import com.twitter.search.earlybird.config.ServingRange;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
|
|
||||||
public class RealtimeServingRangeProvider implements ServingRangeProvider {
|
|
||||||
|
|
||||||
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 240;
|
|
||||||
|
|
||||||
private final SearchDecider decider;
|
|
||||||
private final String deciderKey;
|
|
||||||
|
|
||||||
public RealtimeServingRangeProvider(SearchDecider decider, String deciderKey) {
|
|
||||||
this.decider = decider;
|
|
||||||
this.deciderKey = deciderKey;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ServingRange getServingRange(
|
|
||||||
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
|
|
||||||
return new ServingRange() {
|
|
||||||
@Override
|
|
||||||
public long getServingRangeSinceId() {
|
|
||||||
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
|
|
||||||
(decider.featureExists(deciderKey))
|
|
||||||
? decider.getAvailability(deciderKey)
|
|
||||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
|
||||||
|
|
||||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
|
|
||||||
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeMaxId() {
|
|
||||||
return SnowflakeIdParser.generateValidStatusId(
|
|
||||||
requestContext.getCreatedTimeMillis(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeSinceTimeSecondsFromEpoch() {
|
|
||||||
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
|
|
||||||
(decider.featureExists(deciderKey))
|
|
||||||
? decider.getAvailability(deciderKey)
|
|
||||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
|
||||||
|
|
||||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
|
|
||||||
return boundaryTime / 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getServingRangeUntilTimeSecondsFromEpoch() {
|
|
||||||
return requestContext.getCreatedTimeMillis() / 1000;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,94 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.constants.thriftjava.ThriftQuerySource;
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rejects requests based on the query source of the request. Intended to be used at super-root
|
|
||||||
* or archive-root. If used to reject client request at super-root, the client will get a response
|
|
||||||
* with empty results and a REQUEST_BLOCKED_ERROR status code. If used at archive-root the client
|
|
||||||
* will get a response which might contain some results from realtime and protected and the status
|
|
||||||
* code of the response will depend on how super-root combines responses from the three downstream
|
|
||||||
* roots.
|
|
||||||
*/
|
|
||||||
public class RejectRequestsByQuerySourceFilter extends
|
|
||||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
protected static final String NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN =
|
|
||||||
"num_root_%s_rejected_requests_with_query_source_%s";
|
|
||||||
@VisibleForTesting
|
|
||||||
protected static final String REJECT_REQUESTS_DECIDER_KEY_PATTERN =
|
|
||||||
"root_%s_reject_requests_with_query_source_%s";
|
|
||||||
private final Map<ThriftQuerySource, SearchRateCounter> rejectedRequestsCounterPerQuerySource =
|
|
||||||
new HashMap<>();
|
|
||||||
private final Map<ThriftQuerySource, String> rejectRequestsDeciderKeyPerQuerySource =
|
|
||||||
new HashMap<>();
|
|
||||||
private final SearchDecider searchDecider;
|
|
||||||
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public RejectRequestsByQuerySourceFilter(
|
|
||||||
@Nullable EarlybirdCluster cluster,
|
|
||||||
SearchDecider searchDecider) {
|
|
||||||
|
|
||||||
this.searchDecider = searchDecider;
|
|
||||||
|
|
||||||
String clusterName = cluster != null
|
|
||||||
? cluster.getNameForStats()
|
|
||||||
: EarlybirdCluster.SUPERROOT.getNameForStats();
|
|
||||||
|
|
||||||
for (ThriftQuerySource querySource : ThriftQuerySource.values()) {
|
|
||||||
String querySourceName = querySource.name().toLowerCase();
|
|
||||||
|
|
||||||
rejectedRequestsCounterPerQuerySource.put(querySource,
|
|
||||||
SearchRateCounter.export(
|
|
||||||
String.format(
|
|
||||||
NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN, clusterName, querySourceName)));
|
|
||||||
|
|
||||||
rejectRequestsDeciderKeyPerQuerySource.put(querySource,
|
|
||||||
String.format(
|
|
||||||
REJECT_REQUESTS_DECIDER_KEY_PATTERN, clusterName, querySourceName));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
ThriftQuerySource querySource = request.isSetQuerySource()
|
|
||||||
? request.getQuerySource()
|
|
||||||
: ThriftQuerySource.UNKNOWN;
|
|
||||||
|
|
||||||
String deciderKey = rejectRequestsDeciderKeyPerQuerySource.get(querySource);
|
|
||||||
if (searchDecider.isAvailable(deciderKey)) {
|
|
||||||
rejectedRequestsCounterPerQuerySource.get(querySource).increment();
|
|
||||||
return Future.value(getRejectedRequestResponse(querySource, deciderKey));
|
|
||||||
}
|
|
||||||
return service.apply(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static EarlybirdResponse getRejectedRequestResponse(
|
|
||||||
ThriftQuerySource querySource, String deciderKey) {
|
|
||||||
return new EarlybirdResponse(EarlybirdResponseCode.REQUEST_BLOCKED_ERROR, 0)
|
|
||||||
.setSearchResults(new ThriftSearchResults())
|
|
||||||
.setDebugString(String.format(
|
|
||||||
"Request with query source %s is blocked by decider %s", querySource, deciderKey));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,33 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Filter;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter for transforming a RequestContext to an EarlybirdRequest.
|
|
||||||
*/
|
|
||||||
public class RequestContextToEarlybirdRequestFilter extends
|
|
||||||
Filter<EarlybirdRequestContext, EarlybirdResponse, EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private static final SearchTimerStats REQUEST_CONTEXT_TRIP_TIME =
|
|
||||||
SearchTimerStats.export("request_context_trip_time", TimeUnit.MILLISECONDS, false,
|
|
||||||
true);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
long tripTime = System.currentTimeMillis() - requestContext.getCreatedTimeMillis();
|
|
||||||
REQUEST_CONTEXT_TRIP_TIME.timerIncrement(tripTime);
|
|
||||||
|
|
||||||
return service.apply(requestContext.getRequest());
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,185 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import scala.runtime.BoxedUnit;
|
|
||||||
|
|
||||||
import com.twitter.common.util.Clock;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.Percentile;
|
|
||||||
import com.twitter.search.common.metrics.PercentileUtil;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.query.thriftjava.CollectorParams;
|
|
||||||
import com.twitter.search.common.query.thriftjava.CollectorTerminationParams;
|
|
||||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.snowflake.id.SnowflakeId;
|
|
||||||
import com.twitter.util.Function;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class RequestResultStatsFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
private final Clock clock;
|
|
||||||
private final RequestResultStats stats;
|
|
||||||
|
|
||||||
static class RequestResultStats {
|
|
||||||
private static final String PREFIX = "request_result_properties_";
|
|
||||||
|
|
||||||
private final SearchCounter resultsRequestedCount;
|
|
||||||
private final SearchCounter resultsReturnedCount;
|
|
||||||
private final SearchCounter maxHitsToProcessCount;
|
|
||||||
private final SearchCounter hitsProcessedCount;
|
|
||||||
private final SearchCounter docsProcessedCount;
|
|
||||||
private final SearchCounter timeoutMsCount;
|
|
||||||
private Map<String, Percentile<Integer>> requestedNumResultsPercentileByClientId;
|
|
||||||
private Map<String, Percentile<Integer>> returnedNumResultsPercentileByClientId;
|
|
||||||
private Map<String, Percentile<Long>> oldestResultPercentileByClientId;
|
|
||||||
|
|
||||||
RequestResultStats() {
|
|
||||||
// Request properties
|
|
||||||
resultsRequestedCount = SearchCounter.export(PREFIX + "results_requested_cnt");
|
|
||||||
maxHitsToProcessCount = SearchCounter.export(PREFIX + "max_hits_to_process_cnt");
|
|
||||||
timeoutMsCount = SearchCounter.export(PREFIX + "timeout_ms_cnt");
|
|
||||||
requestedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
// Result properties
|
|
||||||
resultsReturnedCount = SearchCounter.export(PREFIX + "results_returned_cnt");
|
|
||||||
hitsProcessedCount = SearchCounter.export(PREFIX + "hits_processed_cnt");
|
|
||||||
docsProcessedCount = SearchCounter.export(PREFIX + "docs_processed_cnt");
|
|
||||||
returnedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
|
|
||||||
oldestResultPercentileByClientId = new ConcurrentHashMap<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getResultsRequestedCount() {
|
|
||||||
return resultsRequestedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getResultsReturnedCount() {
|
|
||||||
return resultsReturnedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getMaxHitsToProcessCount() {
|
|
||||||
return maxHitsToProcessCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getHitsProcessedCount() {
|
|
||||||
return hitsProcessedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getDocsProcessedCount() {
|
|
||||||
return docsProcessedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchCounter getTimeoutMsCount() {
|
|
||||||
return timeoutMsCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
Percentile<Long> getOldestResultPercentile(String clientId) {
|
|
||||||
return oldestResultPercentileByClientId.computeIfAbsent(clientId,
|
|
||||||
key -> PercentileUtil.createPercentile(statName(clientId, "oldest_result_age_seconds")));
|
|
||||||
}
|
|
||||||
|
|
||||||
Percentile<Integer> getRequestedNumResultsPercentile(String clientId) {
|
|
||||||
return requestedNumResultsPercentileByClientId.computeIfAbsent(clientId,
|
|
||||||
key -> PercentileUtil.createPercentile(statName(clientId, "requested_num_results")));
|
|
||||||
}
|
|
||||||
|
|
||||||
Percentile<Integer> getReturnedNumResultsPercentile(String clientId) {
|
|
||||||
return returnedNumResultsPercentileByClientId.computeIfAbsent(clientId,
|
|
||||||
key -> PercentileUtil.createPercentile(statName(clientId, "returned_num_results")));
|
|
||||||
}
|
|
||||||
|
|
||||||
private String statName(String clientId, String suffix) {
|
|
||||||
return String.format("%s%s_%s", PREFIX, ClientIdUtil.formatClientId(clientId), suffix);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
RequestResultStatsFilter(Clock clock, RequestResultStats stats) {
|
|
||||||
this.clock = clock;
|
|
||||||
this.stats = stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateRequestStats(EarlybirdRequest request) {
|
|
||||||
ThriftSearchQuery searchQuery = request.getSearchQuery();
|
|
||||||
CollectorParams collectorParams = searchQuery.getCollectorParams();
|
|
||||||
|
|
||||||
if (collectorParams != null) {
|
|
||||||
stats.getResultsRequestedCount().add(collectorParams.numResultsToReturn);
|
|
||||||
if (request.isSetClientId()) {
|
|
||||||
stats.getRequestedNumResultsPercentile(request.getClientId())
|
|
||||||
.record(collectorParams.numResultsToReturn);
|
|
||||||
}
|
|
||||||
CollectorTerminationParams terminationParams = collectorParams.getTerminationParams();
|
|
||||||
if (terminationParams != null) {
|
|
||||||
if (terminationParams.isSetMaxHitsToProcess()) {
|
|
||||||
stats.getMaxHitsToProcessCount().add(terminationParams.maxHitsToProcess);
|
|
||||||
}
|
|
||||||
if (terminationParams.isSetTimeoutMs()) {
|
|
||||||
stats.getTimeoutMsCount().add(terminationParams.timeoutMs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (searchQuery.isSetNumResults()) {
|
|
||||||
stats.getResultsRequestedCount().add(searchQuery.numResults);
|
|
||||||
if (request.isSetClientId()) {
|
|
||||||
stats.getRequestedNumResultsPercentile(request.getClientId())
|
|
||||||
.record(searchQuery.numResults);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (searchQuery.isSetMaxHitsToProcess()) {
|
|
||||||
stats.getMaxHitsToProcessCount().add(searchQuery.maxHitsToProcess);
|
|
||||||
}
|
|
||||||
if (request.isSetTimeoutMs()) {
|
|
||||||
stats.getTimeoutMsCount().add(request.timeoutMs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateResultsStats(String clientId, ThriftSearchResults results) {
|
|
||||||
stats.getResultsReturnedCount().add(results.getResultsSize());
|
|
||||||
if (results.isSetNumHitsProcessed()) {
|
|
||||||
stats.getHitsProcessedCount().add(results.numHitsProcessed);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clientId != null) {
|
|
||||||
if (results.getResultsSize() > 0) {
|
|
||||||
List<ThriftSearchResult> resultsList = results.getResults();
|
|
||||||
|
|
||||||
long lastId = resultsList.get(resultsList.size() - 1).getId();
|
|
||||||
long tweetTime = SnowflakeId.timeFromId(lastId).inLongSeconds();
|
|
||||||
long tweetAge = (clock.nowMillis() / 1000) - tweetTime;
|
|
||||||
stats.getOldestResultPercentile(clientId).record(tweetAge);
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.getReturnedNumResultsPercentile(clientId).record(results.getResultsSize());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
updateRequestStats(request);
|
|
||||||
|
|
||||||
return service.apply(request).onSuccess(
|
|
||||||
new Function<EarlybirdResponse, BoxedUnit>() {
|
|
||||||
@Override
|
|
||||||
public BoxedUnit apply(EarlybirdResponse response) {
|
|
||||||
if (response.isSetSearchResults()) {
|
|
||||||
updateResultsStats(request.getClientId(), response.searchResults);
|
|
||||||
}
|
|
||||||
return BoxedUnit.UNIT;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,79 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.root.RequestSuccessStats;
|
|
||||||
import com.twitter.search.common.util.FinagleUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
import static com.twitter.search.common.util.earlybird.EarlybirdResponseUtil.responseConsideredFailed;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Records cancellations, timeouts, and failures for requests that do not go through
|
|
||||||
* ScatterGatherService (which also updates these stats, but for different requests).
|
|
||||||
*/
|
|
||||||
public class RequestSuccessStatsFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private final RequestSuccessStats stats;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
RequestSuccessStatsFilter(RequestSuccessStats stats) {
|
|
||||||
this.stats = stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
final long startTime = System.nanoTime();
|
|
||||||
|
|
||||||
return service.apply(request).addEventListener(
|
|
||||||
new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public void onSuccess(EarlybirdResponse response) {
|
|
||||||
boolean success = true;
|
|
||||||
|
|
||||||
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
|
|
||||||
success = false;
|
|
||||||
stats.getCancelledRequestCount().increment();
|
|
||||||
} else if (response.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) {
|
|
||||||
success = false;
|
|
||||||
stats.getTimedoutRequestCount().increment();
|
|
||||||
} else if (responseConsideredFailed(response.getResponseCode())) {
|
|
||||||
success = false;
|
|
||||||
stats.getErroredRequestCount().increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
long latencyNanos = System.nanoTime() - startTime;
|
|
||||||
stats.getRequestLatencyStats().requestComplete(
|
|
||||||
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, success);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable cause) {
|
|
||||||
long latencyNanos = System.nanoTime() - startTime;
|
|
||||||
stats.getRequestLatencyStats().requestComplete(
|
|
||||||
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, false);
|
|
||||||
|
|
||||||
if (FinagleUtil.isCancelException(cause)) {
|
|
||||||
stats.getCancelledRequestCount().increment();
|
|
||||||
} else if (FinagleUtil.isTimeoutException(cause)) {
|
|
||||||
stats.getTimedoutRequestCount().increment();
|
|
||||||
} else {
|
|
||||||
stats.getErroredRequestCount().increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,105 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.common.cache.CacheLoader;
|
|
||||||
import com.google.common.cache.LoadingCache;
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
|
||||||
|
|
||||||
import com.twitter.common.util.Clock;
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.clientstats.RequestCounters;
|
|
||||||
import com.twitter.search.common.clientstats.RequestCountersEventListener;
|
|
||||||
import com.twitter.search.common.util.FinagleUtil;
|
|
||||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class RequestTypeCountFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private final ImmutableMap<EarlybirdRequestType, RequestCounters> typeCounters;
|
|
||||||
private final RequestCounters allRequestTypesCounter;
|
|
||||||
private final ImmutableMap<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
|
|
||||||
perTypePerClientCounters;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs the filter.
|
|
||||||
*/
|
|
||||||
public RequestTypeCountFilter(final String statSuffix) {
|
|
||||||
ImmutableMap.Builder<EarlybirdRequestType, RequestCounters> perTypeBuilder =
|
|
||||||
ImmutableMap.builder();
|
|
||||||
for (EarlybirdRequestType type : EarlybirdRequestType.values()) {
|
|
||||||
perTypeBuilder.put(type, new RequestCounters(
|
|
||||||
"request_type_count_filter_" + type.getNormalizedName() + "_" + statSuffix));
|
|
||||||
}
|
|
||||||
typeCounters = perTypeBuilder.build();
|
|
||||||
|
|
||||||
allRequestTypesCounter =
|
|
||||||
new RequestCounters("request_type_count_filter_all_" + statSuffix, true);
|
|
||||||
|
|
||||||
ImmutableMap.Builder<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
|
|
||||||
perTypePerClientBuilder = ImmutableMap.builder();
|
|
||||||
|
|
||||||
// No point in setting any kind of expiration policy for the cache, since the stats will
|
|
||||||
// continue to be exported, so the objects will not be GCed anyway.
|
|
||||||
CacheBuilder<Object, Object> cacheBuilder = CacheBuilder.newBuilder();
|
|
||||||
for (final EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
|
||||||
CacheLoader<String, RequestCounters> cacheLoader =
|
|
||||||
new CacheLoader<String, RequestCounters>() {
|
|
||||||
@Override
|
|
||||||
public RequestCounters load(String clientId) {
|
|
||||||
return new RequestCounters("request_type_count_filter_for_" + clientId + "_"
|
|
||||||
+ requestType.getNormalizedName() + "_" + statSuffix);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
perTypePerClientBuilder.put(requestType, cacheBuilder.build(cacheLoader));
|
|
||||||
}
|
|
||||||
perTypePerClientCounters = perTypePerClientBuilder.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType();
|
|
||||||
RequestCounters requestCounters = typeCounters.get(requestType);
|
|
||||||
Preconditions.checkNotNull(requestCounters);
|
|
||||||
|
|
||||||
// Update the per-type and "all" counters.
|
|
||||||
RequestCountersEventListener<EarlybirdResponse> requestCountersEventListener =
|
|
||||||
new RequestCountersEventListener<>(
|
|
||||||
requestCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
|
|
||||||
RequestCountersEventListener<EarlybirdResponse> allRequestTypesEventListener =
|
|
||||||
new RequestCountersEventListener<>(
|
|
||||||
allRequestTypesCounter, Clock.SYSTEM_CLOCK,
|
|
||||||
EarlybirdSuccessfulResponseHandler.INSTANCE);
|
|
||||||
|
|
||||||
RequestCountersEventListener<EarlybirdResponse> perTypePerClientEventListener =
|
|
||||||
updatePerTypePerClientCountersListener(requestContext);
|
|
||||||
|
|
||||||
return service.apply(requestContext)
|
|
||||||
.addEventListener(requestCountersEventListener)
|
|
||||||
.addEventListener(allRequestTypesEventListener)
|
|
||||||
.addEventListener(perTypePerClientEventListener);
|
|
||||||
}
|
|
||||||
|
|
||||||
private RequestCountersEventListener<EarlybirdResponse> updatePerTypePerClientCountersListener(
|
|
||||||
EarlybirdRequestContext earlybirdRequestContext) {
|
|
||||||
EarlybirdRequestType requestType = earlybirdRequestContext.getEarlybirdRequestType();
|
|
||||||
LoadingCache<String, RequestCounters> perClientCounters =
|
|
||||||
perTypePerClientCounters.get(requestType);
|
|
||||||
Preconditions.checkNotNull(perClientCounters);
|
|
||||||
|
|
||||||
String clientId = ClientIdUtil.formatFinagleClientIdAndClientId(
|
|
||||||
FinagleUtil.getFinagleClientName(),
|
|
||||||
ClientIdUtil.getClientIdFromRequest(earlybirdRequestContext.getRequest()));
|
|
||||||
RequestCounters clientCounters = perClientCounters.getUnchecked(clientId);
|
|
||||||
Preconditions.checkNotNull(clientCounters);
|
|
||||||
|
|
||||||
return new RequestCountersEventListener<>(
|
|
||||||
clientCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,50 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
public class ResponseCodeStatFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private final Map<EarlybirdResponseCode, SearchCounter> responseCodeCounters;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create ResponseCodeStatFilter
|
|
||||||
*/
|
|
||||||
public ResponseCodeStatFilter() {
|
|
||||||
responseCodeCounters = Maps.newEnumMap(EarlybirdResponseCode.class);
|
|
||||||
for (EarlybirdResponseCode code : EarlybirdResponseCode.values()) {
|
|
||||||
SearchCounter stat = SearchCounter.export("response_code_" + code.name().toLowerCase());
|
|
||||||
responseCodeCounters.put(code, stat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
final EarlybirdRequest request,
|
|
||||||
final Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
|
|
||||||
return service.apply(request).addEventListener(
|
|
||||||
new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onSuccess(final EarlybirdResponse response) {
|
|
||||||
responseCodeCounters.get(response.getResponseCode()).increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(final Throwable cause) { }
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,114 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.NavigableMap;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
import javax.inject.Singleton;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.collect.ImmutableSortedMap;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCustomGauge;
|
|
||||||
import com.twitter.search.earlybird.config.TierInfo;
|
|
||||||
import com.twitter.search.earlybird.config.TierInfoSource;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.snowflake.id.SnowflakeId;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter to count the tier to which the oldest tweet in the results belong.
|
|
||||||
*/
|
|
||||||
@Singleton
|
|
||||||
public class ResultTierCountFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private static final String COUNTER_PREFIX = "result_tier_count";
|
|
||||||
private final long firstTweetTimeSinceEpochSec;
|
|
||||||
private final NavigableMap<Long, SearchCounter> tierBuckets;
|
|
||||||
private final SearchCounter allCounter = SearchCounter.export(COUNTER_PREFIX + "_all");
|
|
||||||
private final SearchCounter noResultsCounter =
|
|
||||||
SearchCounter.export(COUNTER_PREFIX + "_no_results");
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
ResultTierCountFilter(TierInfoSource tierInfoSource) {
|
|
||||||
List<TierInfo> tierInfos = tierInfoSource.getTierInformation();
|
|
||||||
tierInfos.sort(Comparator.comparing(TierInfo::getDataStartDate));
|
|
||||||
|
|
||||||
firstTweetTimeSinceEpochSec = tierInfos.get(0).getServingRangeSinceTimeSecondsFromEpoch();
|
|
||||||
|
|
||||||
ImmutableSortedMap.Builder<Long, SearchCounter> builder = ImmutableSortedMap.naturalOrder();
|
|
||||||
Collections.reverse(tierInfos);
|
|
||||||
|
|
||||||
for (TierInfo tierInfo : tierInfos) {
|
|
||||||
SearchCounter searchCounter = SearchCounter.export(
|
|
||||||
String.format("%s_%s", COUNTER_PREFIX, tierInfo.getTierName()));
|
|
||||||
builder.put(tierInfo.getServingRangeSinceTimeSecondsFromEpoch(), searchCounter);
|
|
||||||
|
|
||||||
// export cumulative metrics to sum from the latest to a lower tier
|
|
||||||
Collection<SearchCounter> counters = builder.build().values();
|
|
||||||
SearchCustomGauge.export(
|
|
||||||
String.format("%s_down_to_%s", COUNTER_PREFIX, tierInfo.getTierName()),
|
|
||||||
() -> counters.stream()
|
|
||||||
.mapToLong(SearchCounter::get)
|
|
||||||
.sum());
|
|
||||||
}
|
|
||||||
|
|
||||||
tierBuckets = builder.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext context,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
return service.apply(context).addEventListener(
|
|
||||||
new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable cause) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onSuccess(EarlybirdResponse response) {
|
|
||||||
record(response);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
void record(EarlybirdResponse response) {
|
|
||||||
if (response.isSetSearchResults()) {
|
|
||||||
long minResultsStatusId = response.getSearchResults().getResults().stream()
|
|
||||||
.mapToLong(ThriftSearchResult::getId)
|
|
||||||
.min()
|
|
||||||
.orElse(-1);
|
|
||||||
getBucket(minResultsStatusId).increment();
|
|
||||||
}
|
|
||||||
allCounter.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
private SearchCounter getBucket(long statusId) {
|
|
||||||
if (statusId < 0) {
|
|
||||||
return noResultsCounter;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If non-negative statusId is not a SnowflakeId, the tweet must have been created before
|
|
||||||
// Twepoch (2010-11-04T01:42:54Z) and thus belongs to full1.
|
|
||||||
long timeSinceEpochSec = firstTweetTimeSinceEpochSec;
|
|
||||||
if (SnowflakeId.isSnowflakeId(statusId)) {
|
|
||||||
timeSinceEpochSec = SnowflakeId.timeFromId(statusId).inSeconds();
|
|
||||||
}
|
|
||||||
|
|
||||||
return tierBuckets.floorEntry(timeSinceEpochSec).getValue();
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,59 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.search.common.root.ScatterGatherService;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ExperimentCluster;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class ScatterGatherWithExperimentRedirectsService
|
|
||||||
extends Service<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private final Service<EarlybirdRequestContext, EarlybirdResponse>
|
|
||||||
controlScatterGatherService;
|
|
||||||
|
|
||||||
private final Map<ExperimentCluster,
|
|
||||||
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
|
|
||||||
experimentScatterGatherServices;
|
|
||||||
|
|
||||||
private static final Logger LOG =
|
|
||||||
LoggerFactory.getLogger(ScatterGatherWithExperimentRedirectsService.class);
|
|
||||||
|
|
||||||
public ScatterGatherWithExperimentRedirectsService(
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> controlScatterGatherService,
|
|
||||||
Map<ExperimentCluster,
|
|
||||||
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
|
|
||||||
experimentScatterGatherServices
|
|
||||||
) {
|
|
||||||
this.controlScatterGatherService = controlScatterGatherService;
|
|
||||||
this.experimentScatterGatherServices = experimentScatterGatherServices;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequestContext request) {
|
|
||||||
if (request.getRequest().isSetExperimentClusterToUse()) {
|
|
||||||
ExperimentCluster cluster = request.getRequest().getExperimentClusterToUse();
|
|
||||||
|
|
||||||
if (!experimentScatterGatherServices.containsKey(cluster)) {
|
|
||||||
String error = String.format(
|
|
||||||
"Received invalid experiment cluster: %s", cluster.name());
|
|
||||||
|
|
||||||
LOG.error("{} Request: {}", error, request.getRequest());
|
|
||||||
|
|
||||||
return Future.value(new EarlybirdResponse()
|
|
||||||
.setResponseCode(EarlybirdResponseCode.CLIENT_ERROR)
|
|
||||||
.setDebugString(error));
|
|
||||||
}
|
|
||||||
|
|
||||||
return experimentScatterGatherServices.get(cluster).apply(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
return controlScatterGatherService.apply(request);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,43 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
|
||||||
|
|
||||||
import scala.Option;
|
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.finagle.context.Contexts;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.root.SearchPayloadSizeFilter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter that sets the clientId in the local context, to be usd later by SearchPayloadSizeFilter.
|
|
||||||
*/
|
|
||||||
public class SearchPayloadSizeLocalContextFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
private static final SearchCounter CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER = SearchCounter.export(
|
|
||||||
"search_payload_size_local_context_filter_client_id_context_key_not_set");
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
// In production, the SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY should always be set
|
|
||||||
// (by ThriftServer). However, it's not set in tests, because tests do not start a ThriftServer.
|
|
||||||
Option<AtomicReference<String>> clientIdOption =
|
|
||||||
Contexts.local().get(SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY);
|
|
||||||
if (clientIdOption.isDefined()) {
|
|
||||||
AtomicReference<String> clientIdReference = clientIdOption.get();
|
|
||||||
Preconditions.checkArgument(clientIdReference.get() == null);
|
|
||||||
clientIdReference.set(request.getClientId());
|
|
||||||
} else {
|
|
||||||
CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
return service.apply(request);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,140 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
|
||||||
|
|
||||||
import org.apache.thrift.TException;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.thrift.ThriftUtils;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
import com.twitter.util.FutureEventListener;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The general framework for earlybird root to track sensitive results.
|
|
||||||
*/
|
|
||||||
public abstract class SensitiveResultsTrackingFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The type name is used to distinguish different kinds of sensitive results in log.
|
|
||||||
*/
|
|
||||||
private final String typeName;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The mark is to control whether to log expensive information.
|
|
||||||
*/
|
|
||||||
private final boolean logDetails;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor helps distinguish different sensitive content trackers.
|
|
||||||
* @param typeName The sensitive content's name (e.g. nullcast)
|
|
||||||
* @param logDetails Whether to log details such as serialized requests and responses
|
|
||||||
*/
|
|
||||||
public SensitiveResultsTrackingFilter(final String typeName, boolean logDetails) {
|
|
||||||
super();
|
|
||||||
this.typeName = typeName;
|
|
||||||
this.logDetails = logDetails;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the LOG that the sensitive results can write to.
|
|
||||||
*/
|
|
||||||
protected abstract Logger getLogger();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The counter which counts the number of queries with sensitive results.
|
|
||||||
*/
|
|
||||||
protected abstract SearchCounter getSensitiveQueryCounter();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The counter which counts the number of sensitive results.
|
|
||||||
*/
|
|
||||||
protected abstract SearchCounter getSensitiveResultsCounter();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The method defines how the sensitive results are identified.
|
|
||||||
*/
|
|
||||||
protected abstract Set<Long> getSensitiveResults(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
EarlybirdResponse earlybirdResponse) throws Exception;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a set of tweets which should be exclude from the sensitive results set.
|
|
||||||
*/
|
|
||||||
protected abstract Set<Long> getExceptedResults(EarlybirdRequestContext requestContext);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final Future<EarlybirdResponse> apply(
|
|
||||||
final EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
Future<EarlybirdResponse> response = service.apply(requestContext);
|
|
||||||
|
|
||||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public void onSuccess(EarlybirdResponse earlybirdResponse) {
|
|
||||||
try {
|
|
||||||
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
|
|
||||||
&& earlybirdResponse.isSetSearchResults()
|
|
||||||
&& requestContext.getParsedQuery() != null) {
|
|
||||||
Set<Long> statusIds = getSensitiveResults(requestContext, earlybirdResponse);
|
|
||||||
Set<Long> exceptedIds = getExceptedResults(requestContext);
|
|
||||||
statusIds.removeAll(exceptedIds);
|
|
||||||
|
|
||||||
if (statusIds.size() > 0) {
|
|
||||||
getSensitiveQueryCounter().increment();
|
|
||||||
getSensitiveResultsCounter().add(statusIds.size());
|
|
||||||
logContent(requestContext, earlybirdResponse, statusIds);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
getLogger().error("Caught exception while trying to log sensitive results for query: {}",
|
|
||||||
requestContext.getParsedQuery().serialize(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable cause) {
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void logContent(
|
|
||||||
final EarlybirdRequestContext requestContext,
|
|
||||||
final EarlybirdResponse earlybirdResponse,
|
|
||||||
final Set<Long> statusIds) {
|
|
||||||
|
|
||||||
if (logDetails) {
|
|
||||||
String base64Request;
|
|
||||||
try {
|
|
||||||
base64Request = ThriftUtils.toBase64EncodedString(requestContext.getRequest());
|
|
||||||
} catch (TException e) {
|
|
||||||
base64Request = "Failed to parse base 64 request";
|
|
||||||
}
|
|
||||||
getLogger().error("Found " + typeName
|
|
||||||
+ ": {} | "
|
|
||||||
+ "parsedQuery: {} | "
|
|
||||||
+ "request: {} | "
|
|
||||||
+ "base 64 request: {} | "
|
|
||||||
+ "response: {}",
|
|
||||||
Joiner.on(",").join(statusIds),
|
|
||||||
requestContext.getParsedQuery().serialize(),
|
|
||||||
requestContext.getRequest(),
|
|
||||||
base64Request,
|
|
||||||
earlybirdResponse);
|
|
||||||
} else {
|
|
||||||
getLogger().error("Found " + typeName + ": {} for parsedQuery {}",
|
|
||||||
Joiner.on(",").join(statusIds),
|
|
||||||
requestContext.getParsedQuery().serialize());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,27 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/** A per-service filter for handling exceptions. */
|
|
||||||
public class ServiceExceptionHandlingFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
private final EarlybirdResponseExceptionHandler exceptionHandler;
|
|
||||||
|
|
||||||
/** Creates a new ServiceExceptionHandlingFilter instance. */
|
|
||||||
public ServiceExceptionHandlingFilter(EarlybirdCluster cluster) {
|
|
||||||
this.exceptionHandler = new EarlybirdResponseExceptionHandler(cluster.getNameForStats());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
return exceptionHandler.handleException(
|
|
||||||
requestContext.getRequest(), service.apply(requestContext));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,81 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.search.earlybird_root.validators.FacetsResponseValidator;
|
|
||||||
import com.twitter.search.earlybird_root.validators.PassThroughResponseValidator;
|
|
||||||
import com.twitter.search.earlybird_root.validators.ServiceResponseValidator;
|
|
||||||
import com.twitter.search.earlybird_root.validators.TermStatsResultsValidator;
|
|
||||||
import com.twitter.search.earlybird_root.validators.TopTweetsResultsValidator;
|
|
||||||
import com.twitter.util.Function;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filter responsible for handling invalid response returned by downstream services, and
|
|
||||||
* translating them into EarlybirdResponseExceptions.
|
|
||||||
*/
|
|
||||||
public class ServiceResponseValidationFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
|
||||||
|
|
||||||
private final Map<EarlybirdRequestType, ServiceResponseValidator<EarlybirdResponse>>
|
|
||||||
requestTypeToResponseValidators = new HashMap<>();
|
|
||||||
private final EarlybirdCluster cluster;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new filter for handling invalid response
|
|
||||||
*/
|
|
||||||
public ServiceResponseValidationFilter(EarlybirdCluster cluster) {
|
|
||||||
this.cluster = cluster;
|
|
||||||
|
|
||||||
ServiceResponseValidator<EarlybirdResponse> passThroughValidator =
|
|
||||||
new PassThroughResponseValidator();
|
|
||||||
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.FACETS, new FacetsResponseValidator(cluster));
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.RECENCY, passThroughValidator);
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.RELEVANCE, passThroughValidator);
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.STRICT_RECENCY, passThroughValidator);
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.TERM_STATS, new TermStatsResultsValidator(cluster));
|
|
||||||
requestTypeToResponseValidators
|
|
||||||
.put(EarlybirdRequestType.TOP_TWEETS, new TopTweetsResultsValidator(cluster));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
final EarlybirdRequestContext requestContext,
|
|
||||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
|
||||||
return service.apply(requestContext).flatMap(
|
|
||||||
new Function<EarlybirdResponse, Future<EarlybirdResponse>>() {
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdResponse response) {
|
|
||||||
if (response == null) {
|
|
||||||
return Future.exception(new IllegalStateException(
|
|
||||||
cluster + " returned null response"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS) {
|
|
||||||
return requestTypeToResponseValidators
|
|
||||||
.get(requestContext.getEarlybirdRequestType())
|
|
||||||
.validate(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
return Future.value(EarlybirdResponseMergeUtil.transformInvalidResponse(
|
|
||||||
response,
|
|
||||||
String.format("Failure from %s (%s)", cluster, response.getResponseCode())));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,12 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.search.earlybird.config.ServingRange;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
|
|
||||||
public interface ServingRangeProvider {
|
|
||||||
/**
|
|
||||||
* Get a ServingRange implementation.
|
|
||||||
* Usually backed by either TierInfoWrapper or RootClusterBoundaryInfo.
|
|
||||||
*/
|
|
||||||
ServingRange getServingRange(EarlybirdRequestContext requestContext, boolean useBoundaryOverride);
|
|
||||||
}
|
|
Binary file not shown.
@ -1,30 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter that will set the clientId of the request to the strato HttpEndpoint Attribution.
|
|
||||||
* <p>
|
|
||||||
* If the clientId is already set to something non-null then that value is used.
|
|
||||||
* If the clientId is null but Attribution.httpEndpoint() contains a value it will be set as
|
|
||||||
* the clientId.
|
|
||||||
*/
|
|
||||||
public class StratoAttributionClientIdFilter extends
|
|
||||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequest request, Service<EarlybirdRequest, EarlybirdResponse> service
|
|
||||||
) {
|
|
||||||
if (request.getClientId() == null) {
|
|
||||||
ClientIdUtil.getClientIdFromHttpEndpointAttribution().ifPresent(request::setClientId);
|
|
||||||
}
|
|
||||||
|
|
||||||
return service.apply(request);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Binary file not shown.
@ -1,24 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/** A top level filter for handling exceptions. */
|
|
||||||
public class TopLevelExceptionHandlingFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
private final EarlybirdResponseExceptionHandler exceptionHandler;
|
|
||||||
|
|
||||||
/** Creates a new TopLevelExceptionHandlingFilter instance. */
|
|
||||||
public TopLevelExceptionHandlingFilter() {
|
|
||||||
this.exceptionHandler = new EarlybirdResponseExceptionHandler("top_level");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
return exceptionHandler.handleException(request, service.apply(request));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,30 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter that unsets some request fields that make sense only on the SuperRoot, before sending
|
|
||||||
* them to the individual roots.
|
|
||||||
*/
|
|
||||||
public class UnsetSuperRootFieldsFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
private final boolean unsetFollowedUserIds;
|
|
||||||
|
|
||||||
public UnsetSuperRootFieldsFilter() {
|
|
||||||
this(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public UnsetSuperRootFieldsFilter(boolean unsetFollowedUserIds) {
|
|
||||||
this.unsetFollowedUserIds = unsetFollowedUserIds;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
|
||||||
return service.apply(EarlybirdRequestUtil.unsetSuperRootFields(request, unsetFollowedUserIds));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,44 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.filters;
|
|
||||||
|
|
||||||
import javax.inject.Inject;
|
|
||||||
|
|
||||||
import com.twitter.finagle.Service;
|
|
||||||
import com.twitter.finagle.SimpleFilter;
|
|
||||||
import com.twitter.search.common.decider.SearchDecider;
|
|
||||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
public class VeryRecentTweetsFilter
|
|
||||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
|
||||||
private static final String DECIDER_KEY = "enable_very_recent_tweets";
|
|
||||||
private static final SearchRateCounter VERY_RECENT_TWEETS_NOT_MODIFIED =
|
|
||||||
SearchRateCounter.export("very_recent_tweets_not_modified");
|
|
||||||
private static final SearchRateCounter VERY_RECENT_TWEETS_ENABLED =
|
|
||||||
SearchRateCounter.export("very_recent_tweets_enabled");
|
|
||||||
|
|
||||||
private final SearchDecider decider;
|
|
||||||
|
|
||||||
@Inject
|
|
||||||
public VeryRecentTweetsFilter(
|
|
||||||
SearchDecider decider
|
|
||||||
) {
|
|
||||||
this.decider = decider;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Future<EarlybirdResponse> apply(
|
|
||||||
EarlybirdRequest request,
|
|
||||||
Service<EarlybirdRequest, EarlybirdResponse> service
|
|
||||||
) {
|
|
||||||
if (decider.isAvailable(DECIDER_KEY)) {
|
|
||||||
VERY_RECENT_TWEETS_ENABLED.increment();
|
|
||||||
request.setSkipVeryRecentTweets(false);
|
|
||||||
} else {
|
|
||||||
VERY_RECENT_TWEETS_NOT_MODIFIED.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
return service.apply(request);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
Before Width: | Height: | Size: 60 KiB |
Binary file not shown.
@ -1,176 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.TierResponse;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Collection of EarlybirdResponses and associated stats to be merged.
|
|
||||||
*/
|
|
||||||
public class AccumulatedResponses {
|
|
||||||
// The list of the successful responses from all earlybird futures. This does not include empty
|
|
||||||
// responses resulted from null requests.
|
|
||||||
private final List<EarlybirdResponse> successResponses;
|
|
||||||
// The list of the unsuccessful responses from all earlybird futures.
|
|
||||||
private final List<EarlybirdResponse> errorResponses;
|
|
||||||
// the list of max statusIds seen in each earlybird.
|
|
||||||
private final List<Long> maxIds;
|
|
||||||
// the list of min statusIds seen in each earlybird.
|
|
||||||
private final List<Long> minIds;
|
|
||||||
|
|
||||||
private final EarlyTerminationInfo mergedEarlyTerminationInfo;
|
|
||||||
private final boolean isMergingAcrossTiers;
|
|
||||||
private final PartitionCounts partitionCounts;
|
|
||||||
private final int numSearchedSegments;
|
|
||||||
|
|
||||||
public static final class PartitionCounts {
|
|
||||||
private final int numPartitions;
|
|
||||||
private final int numSuccessfulPartitions;
|
|
||||||
private final List<TierResponse> perTierResponse;
|
|
||||||
|
|
||||||
public PartitionCounts(int numPartitions, int numSuccessfulPartitions, List<TierResponse>
|
|
||||||
perTierResponse) {
|
|
||||||
this.numPartitions = numPartitions;
|
|
||||||
this.numSuccessfulPartitions = numSuccessfulPartitions;
|
|
||||||
this.perTierResponse = perTierResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumPartitions() {
|
|
||||||
return numPartitions;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumSuccessfulPartitions() {
|
|
||||||
return numSuccessfulPartitions;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<TierResponse> getPerTierResponse() {
|
|
||||||
return perTierResponse;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create AccumulatedResponses
|
|
||||||
*/
|
|
||||||
public AccumulatedResponses(List<EarlybirdResponse> successResponses,
|
|
||||||
List<EarlybirdResponse> errorResponses,
|
|
||||||
List<Long> maxIds,
|
|
||||||
List<Long> minIds,
|
|
||||||
EarlyTerminationInfo mergedEarlyTerminationInfo,
|
|
||||||
boolean isMergingAcrossTiers,
|
|
||||||
PartitionCounts partitionCounts,
|
|
||||||
int numSearchedSegments) {
|
|
||||||
this.successResponses = successResponses;
|
|
||||||
this.errorResponses = errorResponses;
|
|
||||||
this.maxIds = maxIds;
|
|
||||||
this.minIds = minIds;
|
|
||||||
this.mergedEarlyTerminationInfo = mergedEarlyTerminationInfo;
|
|
||||||
this.isMergingAcrossTiers = isMergingAcrossTiers;
|
|
||||||
this.partitionCounts = partitionCounts;
|
|
||||||
this.numSearchedSegments = numSearchedSegments;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<EarlybirdResponse> getSuccessResponses() {
|
|
||||||
return successResponses;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<EarlybirdResponse> getErrorResponses() {
|
|
||||||
return errorResponses;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Long> getMaxIds() {
|
|
||||||
return maxIds;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Long> getMinIds() {
|
|
||||||
return minIds;
|
|
||||||
}
|
|
||||||
|
|
||||||
public EarlyTerminationInfo getMergedEarlyTerminationInfo() {
|
|
||||||
return mergedEarlyTerminationInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean foundError() {
|
|
||||||
return !errorResponses.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tries to return a merged EarlybirdResponse that propagates as much information from the error
|
|
||||||
* responses as possible.
|
|
||||||
*
|
|
||||||
* If all error responses have the same error response code, the merged response will have the
|
|
||||||
* same error response code, and the debugString/debugInfo on the merged response will be set to
|
|
||||||
* the debugString/debugInfo of one of the merged responses.
|
|
||||||
*
|
|
||||||
* If the error responses have at least 2 different response codes, TRANSIENT_ERROR will be set
|
|
||||||
* on the merged response. Also, we will look for the most common error response code, and will
|
|
||||||
* propagate the debugString/debugInfo from an error response with that response code.
|
|
||||||
*/
|
|
||||||
public EarlybirdResponse getMergedErrorResponse() {
|
|
||||||
Preconditions.checkState(!errorResponses.isEmpty());
|
|
||||||
|
|
||||||
// Find a response that has the most common error response code.
|
|
||||||
int maxCount = 0;
|
|
||||||
EarlybirdResponse errorResponseWithMostCommonErrorResponseCode = null;
|
|
||||||
Map<EarlybirdResponseCode, Integer> responseCodeCounts = Maps.newHashMap();
|
|
||||||
for (EarlybirdResponse errorResponse : errorResponses) {
|
|
||||||
EarlybirdResponseCode responseCode = errorResponse.getResponseCode();
|
|
||||||
Integer responseCodeCount = responseCodeCounts.get(responseCode);
|
|
||||||
if (responseCodeCount == null) {
|
|
||||||
responseCodeCount = 0;
|
|
||||||
}
|
|
||||||
++responseCodeCount;
|
|
||||||
responseCodeCounts.put(responseCode, responseCodeCount);
|
|
||||||
if (responseCodeCount > maxCount) {
|
|
||||||
errorResponseWithMostCommonErrorResponseCode = errorResponse;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If all error responses have the same response code, set it on the merged response.
|
|
||||||
// Otherwise, set TRANSIENT_ERROR on the merged response.
|
|
||||||
EarlybirdResponseCode mergedResponseCode = EarlybirdResponseCode.TRANSIENT_ERROR;
|
|
||||||
if (responseCodeCounts.size() == 1) {
|
|
||||||
mergedResponseCode = responseCodeCounts.keySet().iterator().next();
|
|
||||||
}
|
|
||||||
|
|
||||||
EarlybirdResponse mergedResponse = new EarlybirdResponse()
|
|
||||||
.setResponseCode(mergedResponseCode);
|
|
||||||
|
|
||||||
// Propagate the debugString/debugInfo of the selected error response to the merged response.
|
|
||||||
Preconditions.checkNotNull(errorResponseWithMostCommonErrorResponseCode);
|
|
||||||
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugString()) {
|
|
||||||
mergedResponse.setDebugString(errorResponseWithMostCommonErrorResponseCode.getDebugString());
|
|
||||||
}
|
|
||||||
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugInfo()) {
|
|
||||||
mergedResponse.setDebugInfo(errorResponseWithMostCommonErrorResponseCode.getDebugInfo());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the numPartitions and numPartitionsSucceeded on the mergedResponse
|
|
||||||
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions());
|
|
||||||
mergedResponse.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions());
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isMergingAcrossTiers() {
|
|
||||||
return isMergingAcrossTiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isMergingPartitionsWithinATier() {
|
|
||||||
return !isMergingAcrossTiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public PartitionCounts getPartitionCounts() {
|
|
||||||
return partitionCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumSearchedSegments() {
|
|
||||||
return numSearchedSegments;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,26 +0,0 @@
|
|||||||
java_library(
|
|
||||||
sources = ["*.java"],
|
|
||||||
platform = "java8",
|
|
||||||
tags = ["bazel-compatible"],
|
|
||||||
dependencies = [
|
|
||||||
"3rdparty/jvm/com/google/guava",
|
|
||||||
"3rdparty/jvm/log4j",
|
|
||||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
|
||||||
"src/java/com/twitter/common/collections",
|
|
||||||
"src/java/com/twitter/common/quantity",
|
|
||||||
"src/java/com/twitter/search/common/futures",
|
|
||||||
"src/java/com/twitter/search/common/logging",
|
|
||||||
"src/java/com/twitter/search/common/metrics",
|
|
||||||
"src/java/com/twitter/search/common/partitioning/snowflakeparser",
|
|
||||||
"src/java/com/twitter/search/common/relevance:utils",
|
|
||||||
"src/java/com/twitter/search/common/schema/earlybird",
|
|
||||||
"src/java/com/twitter/search/common/search",
|
|
||||||
"src/java/com/twitter/search/common/util:finagleutil",
|
|
||||||
"src/java/com/twitter/search/common/util/earlybird",
|
|
||||||
"src/java/com/twitter/search/earlybird_root/collectors",
|
|
||||||
"src/java/com/twitter/search/earlybird_root/common",
|
|
||||||
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
|
|
||||||
"src/thrift/com/twitter/search:earlybird-java",
|
|
||||||
"src/thrift/com/twitter/search/common:query-java",
|
|
||||||
],
|
|
||||||
)
|
|
BIN
src/java/com/twitter/search/earlybird_root/mergers/BUILD.docx
Normal file
BIN
src/java/com/twitter/search/earlybird_root/mergers/BUILD.docx
Normal file
Binary file not shown.
Binary file not shown.
@ -1,9 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
public interface EarlyTerminateTierMergePredicate {
|
|
||||||
/**
|
|
||||||
* Do we have enough results so far that we can early terminate and not continue onto next tier?
|
|
||||||
*/
|
|
||||||
boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
|
||||||
boolean foundEarlyTermination);
|
|
||||||
}
|
|
Binary file not shown.
@ -1,176 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Function;
|
|
||||||
import com.google.common.base.Joiner;
|
|
||||||
import com.google.common.collect.Iterables;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.logging.DebugMessageBuilder;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Collects debug messages to attach to EarlybirdResponse
|
|
||||||
*/
|
|
||||||
class EarlybirdResponseDebugMessageBuilder {
|
|
||||||
private static final Logger LOG =
|
|
||||||
LoggerFactory.getLogger(EarlybirdResponseDebugMessageBuilder.class);
|
|
||||||
|
|
||||||
private static final Logger TOO_MANY_FAILED_PARTITIONS_LOG =
|
|
||||||
LoggerFactory.getLogger(String.format("%s_too_many_failed_partitions",
|
|
||||||
EarlybirdResponseDebugMessageBuilder.class.getName()));
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
protected final SearchCounter insufficientValidResponseCounter =
|
|
||||||
SearchCounter.export("insufficient_valid_partition_responses_count");
|
|
||||||
@VisibleForTesting
|
|
||||||
protected final SearchCounter validPartitionResponseCounter =
|
|
||||||
SearchCounter.export("valid_partition_response_count");
|
|
||||||
|
|
||||||
// the combined debug string for all earlybird responses
|
|
||||||
private final StringBuilder debugString;
|
|
||||||
/**
|
|
||||||
* A message builder backed by the same {@link #debugString} above.
|
|
||||||
*/
|
|
||||||
private final DebugMessageBuilder debugMessageBuilder;
|
|
||||||
|
|
||||||
private static final Joiner JOINER = Joiner.on(", ");
|
|
||||||
|
|
||||||
EarlybirdResponseDebugMessageBuilder(EarlybirdRequest request) {
|
|
||||||
this(getDebugLevel(request));
|
|
||||||
}
|
|
||||||
|
|
||||||
EarlybirdResponseDebugMessageBuilder(DebugMessageBuilder.Level level) {
|
|
||||||
this.debugString = new StringBuilder();
|
|
||||||
this.debugMessageBuilder = new DebugMessageBuilder(debugString, level);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DebugMessageBuilder.Level getDebugLevel(EarlybirdRequest request) {
|
|
||||||
if (request.isSetDebugMode() && request.getDebugMode() > 0) {
|
|
||||||
return DebugMessageBuilder.getDebugLevel(request.getDebugMode());
|
|
||||||
} else if (request.isSetDebugOptions()) {
|
|
||||||
return DebugMessageBuilder.Level.DEBUG_BASIC;
|
|
||||||
} else {
|
|
||||||
return DebugMessageBuilder.Level.DEBUG_NONE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected boolean isDebugMode() {
|
|
||||||
return debugMessageBuilder.getDebugLevel() > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void append(String msg) {
|
|
||||||
debugString.append(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugAndLogWarning(String msg) {
|
|
||||||
if (isDebugMode()) {
|
|
||||||
debugString.append(msg).append('\n');
|
|
||||||
}
|
|
||||||
LOG.warn(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugDetailed(String format, Object... args) {
|
|
||||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_DETAILED, format, args);
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugVerbose(String format, Object... args) {
|
|
||||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE, format, args);
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugVerbose2(String format, Object... args) {
|
|
||||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE_2, format, args);
|
|
||||||
}
|
|
||||||
|
|
||||||
void debugAtLevel(DebugMessageBuilder.Level level, String format, Object... args) {
|
|
||||||
boolean levelOK = debugMessageBuilder.isAtLeastLevel(level);
|
|
||||||
if (levelOK || LOG.isDebugEnabled()) {
|
|
||||||
// We check both modes here in order to build the formatted message only once.
|
|
||||||
String message = String.format(format, args);
|
|
||||||
|
|
||||||
LOG.debug(message);
|
|
||||||
|
|
||||||
if (levelOK) {
|
|
||||||
debugString.append(message).append('\n');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
String debugString() {
|
|
||||||
return debugString.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
DebugMessageBuilder getDebugMessageBuilder() {
|
|
||||||
return debugMessageBuilder;
|
|
||||||
}
|
|
||||||
|
|
||||||
void logBelowSuccessThreshold(ThriftSearchQuery searchQuery, int numSuccessResponses,
|
|
||||||
int numPartitions, double successThreshold) {
|
|
||||||
String rawQuery = (searchQuery != null && searchQuery.isSetRawQuery())
|
|
||||||
? "[" + searchQuery.getRawQuery() + "]" : "null";
|
|
||||||
String serializedQuery = (searchQuery != null && searchQuery.isSetSerializedQuery())
|
|
||||||
? "[" + searchQuery.getSerializedQuery() + "]" : "null";
|
|
||||||
// Not enough successful responses from partitions.
|
|
||||||
String errorMessage = String.format(
|
|
||||||
"Only %d valid responses returned out of %d partitions for raw query: %s"
|
|
||||||
+ " serialized query: %s. Lower than threshold of %s",
|
|
||||||
numSuccessResponses, numPartitions, rawQuery, serializedQuery, successThreshold);
|
|
||||||
|
|
||||||
TOO_MANY_FAILED_PARTITIONS_LOG.warn(errorMessage);
|
|
||||||
|
|
||||||
insufficientValidResponseCounter.increment();
|
|
||||||
validPartitionResponseCounter.add(numSuccessResponses);
|
|
||||||
debugString.append(errorMessage);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
void logResponseDebugInfo(EarlybirdRequest earlybirdRequest,
|
|
||||||
String partitionTierName,
|
|
||||||
EarlybirdResponse response) {
|
|
||||||
if (response.isSetDebugString() && !response.getDebugString().isEmpty()) {
|
|
||||||
debugString.append(String.format("Received response from [%s] with debug string [%s]",
|
|
||||||
partitionTierName, response.getDebugString())).append("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!response.isSetResponseCode()) {
|
|
||||||
debugAndLogWarning(String.format(
|
|
||||||
"Received Earlybird null response code for query [%s] from [%s]",
|
|
||||||
earlybirdRequest, partitionTierName));
|
|
||||||
} else if (response.getResponseCode() != EarlybirdResponseCode.SUCCESS
|
|
||||||
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED
|
|
||||||
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_DISABLED
|
|
||||||
&& response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED) {
|
|
||||||
debugAndLogWarning(String.format(
|
|
||||||
"Received Earlybird response error [%s] for query [%s] from [%s]",
|
|
||||||
response.getResponseCode(), earlybirdRequest, partitionTierName));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (debugMessageBuilder.isVerbose2()) {
|
|
||||||
debugVerbose2("Earlybird [%s] returned response: %s", partitionTierName, response);
|
|
||||||
} else if (debugMessageBuilder.isVerbose()) {
|
|
||||||
if (response.isSetSearchResults() && response.getSearchResults().getResultsSize() > 0) {
|
|
||||||
String ids = JOINER.join(Iterables.transform(
|
|
||||||
response.getSearchResults().getResults(),
|
|
||||||
new Function<ThriftSearchResult, Long>() {
|
|
||||||
@Nullable
|
|
||||||
@Override
|
|
||||||
public Long apply(ThriftSearchResult result) {
|
|
||||||
return result.getId();
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
debugVerbose("Earlybird [%s] returned TweetIDs: %s", partitionTierName, ids);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,604 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import scala.runtime.BoxedUnit;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Optional;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.collect.ImmutableList;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
import com.google.common.collect.Sets;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
|
||||||
import com.twitter.search.common.util.FinagleUtil;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
|
||||||
import com.twitter.search.common.util.earlybird.ResultsUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.collectors.MultiwayMergeCollector;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
|
|
||||||
import com.twitter.util.Function;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Base EarlybirdResponseMerger containing basic logic to merge EarlybirdResponse objects
|
|
||||||
*/
|
|
||||||
public abstract class EarlybirdResponseMerger implements EarlyTerminateTierMergePredicate {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdResponseMerger.class);
|
|
||||||
private static final Logger MIN_SEARCHED_STATUS_ID_LOGGER =
|
|
||||||
LoggerFactory.getLogger("MinSearchedStatusIdLogger");
|
|
||||||
|
|
||||||
private static final SearchCounter NO_SEARCH_RESULT_COUNTER =
|
|
||||||
SearchCounter.export("no_search_result_count");
|
|
||||||
private static final SearchCounter NO_RESPONSES_TO_MERGE =
|
|
||||||
SearchCounter.export("no_responses_to_merge");
|
|
||||||
private static final SearchCounter EARLYBIRD_RESPONSE_NO_MORE_RESULTS =
|
|
||||||
SearchCounter.export("merger_earlybird_response_no_more_results");
|
|
||||||
private static final String PARTITION_OR_TIER_COUNTER_NAME_FORMAT =
|
|
||||||
"merger_waited_for_response_from_%s_counter";
|
|
||||||
private static final String PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT =
|
|
||||||
"merger_num_error_responses_from_%s";
|
|
||||||
private static final String PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT =
|
|
||||||
"merger_earlybird_response_code_from_%s_%s";
|
|
||||||
|
|
||||||
protected final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
|
|
||||||
protected final EarlybirdRequestContext requestContext;
|
|
||||||
protected final ImmutableList<Future<EarlybirdResponse>> responses;
|
|
||||||
protected AccumulatedResponses accumulatedResponses;
|
|
||||||
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final Map<EarlybirdRequestType, SearchCounter> MERGER_CREATED_STATS =
|
|
||||||
perRequestTypeCounterImmutableMap("earlybird_response_merger_%s_created_count");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final Map<EarlybirdRequestType, SearchCounter>
|
|
||||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID = perRequestTypeCounterImmutableMap(
|
|
||||||
"merger_%s_min_searched_status_id_larger_than_request_max_id");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final Map<EarlybirdRequestType, SearchCounter>
|
|
||||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME = perRequestTypeCounterImmutableMap(
|
|
||||||
"merger_%s_min_searched_status_id_larger_than_request_until_time");
|
|
||||||
|
|
||||||
private static Map<EarlybirdRequestType, SearchCounter> perRequestTypeCounterImmutableMap(
|
|
||||||
String statPattern) {
|
|
||||||
Map<EarlybirdRequestType, SearchCounter> statsMap = Maps.newEnumMap(EarlybirdRequestType.class);
|
|
||||||
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
|
|
||||||
String statName = String.format(statPattern, earlybirdRequestType.getNormalizedName());
|
|
||||||
statsMap.put(earlybirdRequestType, SearchCounter.export(statName));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Maps.immutableEnumMap(statsMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final com.google.common.base.Function<EarlybirdResponse, Map<Long, Integer>>
|
|
||||||
HIT_COUNT_GETTER =
|
|
||||||
response -> response.getSearchResults() == null
|
|
||||||
? null
|
|
||||||
: response.getSearchResults().getHitCounts();
|
|
||||||
|
|
||||||
private final ChainMerger chainMerger;
|
|
||||||
|
|
||||||
private class ChainMerger {
|
|
||||||
private final EarlybirdRequestContext requestContext;
|
|
||||||
private final ResponseAccumulator responseAccumulator;
|
|
||||||
private final List<Future<EarlybirdResponse>> responses;
|
|
||||||
private final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
|
|
||||||
private int currentFutureIndex = -1;
|
|
||||||
|
|
||||||
public ChainMerger(EarlybirdRequestContext requestContext,
|
|
||||||
ResponseAccumulator responseAccumulator,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder) {
|
|
||||||
this.requestContext = requestContext;
|
|
||||||
this.responseAccumulator = responseAccumulator;
|
|
||||||
this.responses = responses;
|
|
||||||
this.responseMessageBuilder = responseMessageBuilder;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Future<EarlybirdResponse> merge() {
|
|
||||||
// 'responseFutures' should always be sorted.
|
|
||||||
// When returned by EarlybirdScatterGather service, the responses are sorted by partition ID.
|
|
||||||
// When returned by EarlybirdChainedScatterGatherService,
|
|
||||||
// responses are sorted descending by tier start date. See:
|
|
||||||
// com.twitter.search.earlybird_root.EarlybirdChainedScatterGatherService.TIER_COMPARATOR.
|
|
||||||
//
|
|
||||||
// When merging responses from partitions, we want to wait for responses from all partitions,
|
|
||||||
// so the order in which we wait for those results does not matter. When merging responses
|
|
||||||
// from tiers, we want to wait for the response from the latest. If we don't need any more
|
|
||||||
// responses to compute the final response, then we don't need to wait for the responses from
|
|
||||||
// other tiers. If we cannot terminate early, then we want to wait for the responses from the
|
|
||||||
// second tier, and so on.
|
|
||||||
//
|
|
||||||
// We do not need to have any explicit synchronization, because:
|
|
||||||
// 1. The callbacks for future_i are set by the flatMap() callback on future_{i-1} (when
|
|
||||||
// recursively calling merge() inside the flatMap()).
|
|
||||||
// 2. Before setting the callbacks on future_i, future_{i-1}.flatMap() adds the response
|
|
||||||
// results to mergeHelper.
|
|
||||||
// 3. When the callbacks on future_i are set, the memory barrier between
|
|
||||||
// thread_running_future_{i-1} and thread_running_future_i is crossed. This guarantees
|
|
||||||
// that thread_running_future_i will see the updates to mergeHelper before it sees the
|
|
||||||
// callbacks. (Or thread_running_future_{i-1} == thread_running_future_i, in which case
|
|
||||||
// synchronization is not an issue, and correctness is guarateed by the order in which
|
|
||||||
// things will run.)
|
|
||||||
// 4. The same reasoning applies to currentFutureIndex.
|
|
||||||
|
|
||||||
++currentFutureIndex;
|
|
||||||
if (currentFutureIndex >= responses.size()) {
|
|
||||||
return Future.value(getTimedMergedResponse(responseAccumulator.getAccumulatedResults()));
|
|
||||||
}
|
|
||||||
|
|
||||||
final String partitionTierName =
|
|
||||||
responseAccumulator.getNameForLogging(currentFutureIndex, responses.size());
|
|
||||||
final String nameForEarlybirdResponseCodeStats =
|
|
||||||
responseAccumulator.getNameForEarlybirdResponseCodeStats(
|
|
||||||
currentFutureIndex, responses.size());
|
|
||||||
|
|
||||||
// If a tier in the chain throws an exception, convert it to a null response, and let the
|
|
||||||
// mergeHelper handle it appropriately.
|
|
||||||
return responses.get(currentFutureIndex)
|
|
||||||
.handle(Function.func(t -> {
|
|
||||||
if (FinagleUtil.isCancelException(t)) {
|
|
||||||
return new EarlybirdResponse()
|
|
||||||
.setResponseCode(EarlybirdResponseCode.CLIENT_CANCEL_ERROR);
|
|
||||||
} else if (FinagleUtil.isTimeoutException(t)) {
|
|
||||||
return new EarlybirdResponse()
|
|
||||||
.setResponseCode(EarlybirdResponseCode.SERVER_TIMEOUT_ERROR);
|
|
||||||
} else {
|
|
||||||
SearchCounter.export(
|
|
||||||
String.format(PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT, partitionTierName))
|
|
||||||
.increment();
|
|
||||||
if (responseMessageBuilder.isDebugMode()) {
|
|
||||||
responseMessageBuilder.debugAndLogWarning(
|
|
||||||
String.format("[%s] failed, exception [%s]",
|
|
||||||
partitionTierName, t.toString()));
|
|
||||||
}
|
|
||||||
LOG.warn("exception response from: " + partitionTierName, t);
|
|
||||||
return new EarlybirdResponse()
|
|
||||||
.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
.flatMap(Function.func(response -> {
|
|
||||||
Preconditions.checkNotNull(response);
|
|
||||||
|
|
||||||
SearchCounter.export(
|
|
||||||
String.format(PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT,
|
|
||||||
nameForEarlybirdResponseCodeStats,
|
|
||||||
response.getResponseCode().name().toLowerCase()))
|
|
||||||
.increment();
|
|
||||||
|
|
||||||
if ((response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED)
|
|
||||||
&& (response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
|
|
||||||
SearchCounter.export(
|
|
||||||
String.format(PARTITION_OR_TIER_COUNTER_NAME_FORMAT, partitionTierName))
|
|
||||||
.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
|
|
||||||
// the request has been cancelled, no need to proceed
|
|
||||||
return Future.value(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
rewriteResponseCodeIfSearchResultsMissing(requestContext, partitionTierName, response);
|
|
||||||
responseMessageBuilder.logResponseDebugInfo(
|
|
||||||
requestContext.getRequest(),
|
|
||||||
partitionTierName,
|
|
||||||
response);
|
|
||||||
responseAccumulator.addResponse(
|
|
||||||
responseMessageBuilder,
|
|
||||||
requestContext.getRequest(),
|
|
||||||
response);
|
|
||||||
|
|
||||||
if (responseAccumulator.shouldEarlyTerminateMerge(EarlybirdResponseMerger.this)) {
|
|
||||||
return Future.value(getTimedMergedResponse(
|
|
||||||
responseAccumulator.getAccumulatedResults()));
|
|
||||||
}
|
|
||||||
return merge();
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void rewriteResponseCodeIfSearchResultsMissing(
|
|
||||||
EarlybirdRequestContext earlybirdRequestContext,
|
|
||||||
String partitionTierName,
|
|
||||||
EarlybirdResponse response) {
|
|
||||||
// We always require searchResults to be set, even for term stats and facet requests.
|
|
||||||
// This is because searchResults contains important info such as pagination cursors
|
|
||||||
// like minSearchStatusId and minSearchedTimeSinceEpoch.
|
|
||||||
// We expect all successful responses to have searchResults set.
|
|
||||||
if (response.isSetResponseCode()
|
|
||||||
&& response.getResponseCode() == EarlybirdResponseCode.SUCCESS
|
|
||||||
&& response.getSearchResults() == null) {
|
|
||||||
NO_SEARCH_RESULT_COUNTER.increment();
|
|
||||||
LOG.warn("Received Earlybird response with null searchResults from [{}]"
|
|
||||||
+ " EarlybirdRequest [{}] EarlybirdResponse [{}] ",
|
|
||||||
partitionTierName, earlybirdRequestContext.getRequest(), response);
|
|
||||||
response.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct a EarlybirdResponseMerger to merge responses from multiple partitions or tiers
|
|
||||||
* based on mode.
|
|
||||||
*/
|
|
||||||
EarlybirdResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator responseAccumulator) {
|
|
||||||
this.requestContext = requestContext;
|
|
||||||
this.responses = ImmutableList.copyOf(responses);
|
|
||||||
this.responseMessageBuilder =
|
|
||||||
new EarlybirdResponseDebugMessageBuilder(requestContext.getRequest());
|
|
||||||
this.chainMerger = new ChainMerger(requestContext, responseAccumulator, responses,
|
|
||||||
responseMessageBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a response merger to merge the given responses.
|
|
||||||
*/
|
|
||||||
public static EarlybirdResponseMerger getResponseMerger(
|
|
||||||
EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator helper,
|
|
||||||
EarlybirdCluster cluster,
|
|
||||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
|
||||||
int numPartitions) {
|
|
||||||
EarlybirdRequestType type = requestContext.getEarlybirdRequestType();
|
|
||||||
MERGER_CREATED_STATS.get(type).increment();
|
|
||||||
switch (type) {
|
|
||||||
case FACETS:
|
|
||||||
return new FacetResponseMerger(requestContext, responses, helper);
|
|
||||||
case TERM_STATS:
|
|
||||||
return new TermStatisticsResponseMerger(requestContext, responses, helper);
|
|
||||||
case RECENCY:
|
|
||||||
return new RecencyResponseMerger(requestContext, responses, helper, featureSchemaMerger);
|
|
||||||
case STRICT_RECENCY:
|
|
||||||
return new StrictRecencyResponseMerger(
|
|
||||||
requestContext, responses, helper, featureSchemaMerger, cluster);
|
|
||||||
case RELEVANCE:
|
|
||||||
return new RelevanceResponseMerger(
|
|
||||||
requestContext, responses, helper, featureSchemaMerger, numPartitions);
|
|
||||||
case TOP_TWEETS:
|
|
||||||
return new TopTweetsResponseMerger(requestContext, responses, helper);
|
|
||||||
default:
|
|
||||||
throw new RuntimeException("EarlybirdRequestType " + type + "is not supported by merge");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method can perform two types of merges:
|
|
||||||
* 1. merge responses within a tier from different partitions.
|
|
||||||
* 2. merge responses from multiple tiers.
|
|
||||||
*/
|
|
||||||
public final Future<EarlybirdResponse> merge() {
|
|
||||||
return chainMerger.merge()
|
|
||||||
.onSuccess(checkMinSearchedStatusIdFunction(
|
|
||||||
"max_id",
|
|
||||||
EarlybirdRequestUtil.getRequestMaxId(requestContext.getParsedQuery()),
|
|
||||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID.get(
|
|
||||||
requestContext.getEarlybirdRequestType())))
|
|
||||||
.onSuccess(checkMinSearchedStatusIdFunction(
|
|
||||||
"until_time",
|
|
||||||
EarlybirdRequestUtil.getRequestMaxIdFromUntilTime(requestContext.getParsedQuery()),
|
|
||||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME.get(
|
|
||||||
requestContext.getEarlybirdRequestType())));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the function that checks if the minSearchedStatusID on the merged response is higher
|
|
||||||
* than the max ID in the request.
|
|
||||||
*/
|
|
||||||
private Function<EarlybirdResponse, BoxedUnit> checkMinSearchedStatusIdFunction(
|
|
||||||
final String operator, final Optional<Long> requestMaxId, final SearchCounter stat) {
|
|
||||||
return Function.cons(mergedResponse -> {
|
|
||||||
if (requestMaxId.isPresent()
|
|
||||||
&& requestMaxId.get() != Long.MAX_VALUE
|
|
||||||
&& (mergedResponse.getResponseCode() == EarlybirdResponseCode.SUCCESS)
|
|
||||||
&& mergedResponse.isSetSearchResults()
|
|
||||||
&& mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
long minSearchedStatusId = mergedResponse.getSearchResults().getMinSearchedStatusID();
|
|
||||||
// We sometimes set minSearchedStatusId = max_id + 1 when a request times out even
|
|
||||||
// before any search happens.
|
|
||||||
// Check SEARCH-10134 for more details.
|
|
||||||
if (minSearchedStatusId > requestMaxId.get() + 1) {
|
|
||||||
stat.increment();
|
|
||||||
String logMessage = "Response has a minSearchedStatusID ({}) larger than request "
|
|
||||||
+ operator + " ({})."
|
|
||||||
+ "\nrequest type: {}"
|
|
||||||
+ "\nrequest: {}"
|
|
||||||
+ "\nmerged response: {}"
|
|
||||||
+ "\nSuccessful accumulated responses:";
|
|
||||||
List<Object> logMessageParams = Lists.newArrayList();
|
|
||||||
logMessageParams.add(minSearchedStatusId);
|
|
||||||
logMessageParams.add(requestMaxId.get());
|
|
||||||
logMessageParams.add(requestContext.getEarlybirdRequestType());
|
|
||||||
logMessageParams.add(requestContext.getRequest());
|
|
||||||
logMessageParams.add(mergedResponse);
|
|
||||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
logMessage += "\naccumulated response: {}";
|
|
||||||
logMessageParams.add(response);
|
|
||||||
}
|
|
||||||
MIN_SEARCHED_STATUS_ID_LOGGER.warn(logMessage, logMessageParams.toArray());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlybirdResponse getTimedMergedResponse(AccumulatedResponses accResponses) {
|
|
||||||
long start = System.nanoTime();
|
|
||||||
try {
|
|
||||||
return getMergedResponse(accResponses);
|
|
||||||
} finally {
|
|
||||||
long totalTime = System.nanoTime() - start;
|
|
||||||
getMergedResponseTimer().timerIncrement(totalTime);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlybirdResponse initializeMergedSuccessResponseFromAccumulatedResponses() {
|
|
||||||
EarlybirdResponse mergedResponse = new EarlybirdResponse();
|
|
||||||
|
|
||||||
AccumulatedResponses.PartitionCounts partitionCounts =
|
|
||||||
accumulatedResponses.getPartitionCounts();
|
|
||||||
|
|
||||||
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions())
|
|
||||||
.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions())
|
|
||||||
.setPerTierResponse(partitionCounts.getPerTierResponse())
|
|
||||||
.setNumSearchedSegments(accumulatedResponses.getNumSearchedSegments());
|
|
||||||
|
|
||||||
mergedResponse.setEarlyTerminationInfo(accumulatedResponses.getMergedEarlyTerminationInfo());
|
|
||||||
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlybirdResponse getMergedResponse(AccumulatedResponses accResponses) {
|
|
||||||
accumulatedResponses = accResponses;
|
|
||||||
EarlybirdResponse mergedResponse;
|
|
||||||
|
|
||||||
if (accumulatedResponses.getSuccessResponses().isEmpty()
|
|
||||||
&& !accumulatedResponses.foundError()) {
|
|
||||||
// No successful or error responses. This means that all tiers / partitions are intentionally
|
|
||||||
// skipped. Return a blank successful response.
|
|
||||||
NO_RESPONSES_TO_MERGE.increment();
|
|
||||||
mergedResponse = new EarlybirdResponse()
|
|
||||||
.setResponseCode(EarlybirdResponseCode.SUCCESS)
|
|
||||||
.setSearchResults(new ThriftSearchResults())
|
|
||||||
.setDebugString("No responses to merge, probably because all tiers/partitions "
|
|
||||||
+ "were skipped.");
|
|
||||||
} else if (accumulatedResponses.isMergingAcrossTiers()) {
|
|
||||||
mergedResponse = getMergedResponseAcrossTiers();
|
|
||||||
} else {
|
|
||||||
mergedResponse = getMergedResponseAcrossPartitions();
|
|
||||||
}
|
|
||||||
|
|
||||||
saveMergedDebugString(mergedResponse);
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlybirdResponse getMergedResponseAcrossTiers() {
|
|
||||||
Preconditions.checkState(
|
|
||||||
!accumulatedResponses.getSuccessResponses().isEmpty()
|
|
||||||
|| accumulatedResponses.foundError());
|
|
||||||
|
|
||||||
// When merging across tiers, if we have one failed tier, we should fail the whole
|
|
||||||
// response. Note that due to early termination, if a tier that is old fails
|
|
||||||
// but the newer tiers return enough results, the failed tier won't show up
|
|
||||||
// here in accumulatedResponses -- the only tiers that show up here
|
|
||||||
// will be successful.
|
|
||||||
if (accumulatedResponses.foundError()) {
|
|
||||||
// The TierResponseAccumulator early terminates on the first error, so we should
|
|
||||||
// never get more than one error. This means that the getMergedErrorResponse will
|
|
||||||
// return an error response with the error code of that one error, and will never
|
|
||||||
// have to decide which error response to return if the error responses are all
|
|
||||||
// different.
|
|
||||||
|
|
||||||
// Perhaps we should just return accumulatedResponses.getErrorResponses().get(0);
|
|
||||||
Preconditions.checkState(accumulatedResponses.getErrorResponses().size() == 1);
|
|
||||||
return accumulatedResponses.getMergedErrorResponse();
|
|
||||||
} else {
|
|
||||||
EarlybirdResponse mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
|
|
||||||
return internalMerge(mergedResponse);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlybirdResponse getMergedResponseAcrossPartitions() {
|
|
||||||
Preconditions.checkState(
|
|
||||||
!accumulatedResponses.getSuccessResponses().isEmpty()
|
|
||||||
|| accumulatedResponses.foundError());
|
|
||||||
|
|
||||||
EarlybirdResponse mergedResponse;
|
|
||||||
|
|
||||||
// Unlike tier merging, one failed response doesn't mean the merged response should
|
|
||||||
// fail. If we have successful responses we can check the success ratio and if its
|
|
||||||
// good we can still return a successful merge.
|
|
||||||
if (!accumulatedResponses.getSuccessResponses().isEmpty()) {
|
|
||||||
// We have at least one successful response, but still need to check the success ratio.
|
|
||||||
// mergedResponse is a SUCCESS response after this call, but we will
|
|
||||||
// set it to failure below if necessary.
|
|
||||||
mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
|
|
||||||
|
|
||||||
int numSuccessResponses = mergedResponse.getNumSuccessfulPartitions();
|
|
||||||
int numPartitions = mergedResponse.getNumPartitions();
|
|
||||||
double successThreshold = getSuccessResponseThreshold();
|
|
||||||
if (checkSuccessPartitionRatio(numSuccessResponses, numPartitions, successThreshold)) {
|
|
||||||
// Success! Proceed with merging.
|
|
||||||
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
|
|
||||||
mergedResponse = internalMerge(mergedResponse);
|
|
||||||
} else {
|
|
||||||
responseMessageBuilder.logBelowSuccessThreshold(
|
|
||||||
requestContext.getRequest().getSearchQuery(), numSuccessResponses, numPartitions,
|
|
||||||
successThreshold);
|
|
||||||
mergedResponse.setResponseCode(EarlybirdResponseCode.TOO_MANY_PARTITIONS_FAILED_ERROR);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
mergedResponse = accumulatedResponses.getMergedErrorResponse();
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Derive class should implement the logic to merge the specific type of results (recency,
|
|
||||||
* relevance, Top Tweets, etc..)
|
|
||||||
*/
|
|
||||||
protected abstract EarlybirdResponse internalMerge(EarlybirdResponse response);
|
|
||||||
|
|
||||||
protected abstract SearchTimerStats getMergedResponseTimer();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Do we have enough results so far that we can early terminate and not continue onto next tier?
|
|
||||||
*/
|
|
||||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
|
||||||
boolean foundEarlyTermination) {
|
|
||||||
// We are taking the most conservative tier response merging.
|
|
||||||
// This is the most conservative merge logic --- as long as we have some results, we should
|
|
||||||
// not return anything from the next tier. This may cause not ideal experience where a
|
|
||||||
// page is not full, but the use can still scroll further.
|
|
||||||
|
|
||||||
return foundEarlyTermination || totalResultsFromSuccessfulShards >= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void saveMergedDebugString(EarlybirdResponse mergedResponse) {
|
|
||||||
if (responseMessageBuilder.isDebugMode()) {
|
|
||||||
String message = responseMessageBuilder.debugString();
|
|
||||||
mergedResponse.setDebugString(message);
|
|
||||||
if (!accumulatedResponses.getSuccessResponses().isEmpty()
|
|
||||||
&& accumulatedResponses.getSuccessResponses().get(0).isSetDebugInfo()) {
|
|
||||||
|
|
||||||
EarlybirdDebugInfo debugInfo =
|
|
||||||
accumulatedResponses.getSuccessResponses().get(0).getDebugInfo();
|
|
||||||
mergedResponse.setDebugInfo(debugInfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getSuccessResponseThreshold() {
|
|
||||||
EarlybirdRequest request = requestContext.getRequest();
|
|
||||||
if (request.isSetSuccessfulResponseThreshold()) {
|
|
||||||
double successfulResponseThreshold = request.getSuccessfulResponseThreshold();
|
|
||||||
Preconditions.checkArgument(successfulResponseThreshold > 0,
|
|
||||||
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
|
|
||||||
Preconditions.checkArgument(successfulResponseThreshold <= 1.0,
|
|
||||||
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
|
|
||||||
return successfulResponseThreshold;
|
|
||||||
} else {
|
|
||||||
return getDefaultSuccessResponseThreshold();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract double getDefaultSuccessResponseThreshold();
|
|
||||||
|
|
||||||
private static boolean checkSuccessPartitionRatio(
|
|
||||||
int numSuccessResponses,
|
|
||||||
int numPartitions,
|
|
||||||
double goodResponseThreshold) {
|
|
||||||
Preconditions.checkArgument(goodResponseThreshold > 0.0,
|
|
||||||
"Invalid goodResponseThreshold %s", goodResponseThreshold);
|
|
||||||
return numSuccessResponses >= (numPartitions * goodResponseThreshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merge hit counts from all results.
|
|
||||||
*/
|
|
||||||
protected Map<Long, Integer> aggregateHitCountMap() {
|
|
||||||
Map<Long, Integer> hitCounts = ResultsUtil
|
|
||||||
.aggregateCountMap(accumulatedResponses.getSuccessResponses(), HIT_COUNT_GETTER);
|
|
||||||
if (hitCounts.size() > 0) {
|
|
||||||
if (responseMessageBuilder.isDebugMode()) {
|
|
||||||
responseMessageBuilder.append("Hit counts:\n");
|
|
||||||
for (Map.Entry<Long, Integer> entry : hitCounts.entrySet()) {
|
|
||||||
responseMessageBuilder.append(String.format(" %10s seconds: %d hits\n",
|
|
||||||
entry.getKey() / 1000, entry.getValue()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hitCounts;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of results to keep as part of merge-collection.
|
|
||||||
*/
|
|
||||||
protected final int computeNumResultsToKeep() {
|
|
||||||
return EarlybirdResponseMergeUtil.computeNumResultsToKeep(requestContext.getRequest());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove exact duplicates (same id) from the result set.
|
|
||||||
*/
|
|
||||||
protected static void trimExactDups(ThriftSearchResults searchResults, TrimStats trimStats) {
|
|
||||||
int numResults = searchResults.getResultsSize();
|
|
||||||
List<ThriftSearchResult> oldResults = searchResults.getResults();
|
|
||||||
List<ThriftSearchResult> newResults = Lists.newArrayListWithCapacity(numResults);
|
|
||||||
HashSet<Long> resultSet = Sets.newHashSetWithExpectedSize(numResults);
|
|
||||||
|
|
||||||
for (ThriftSearchResult result : oldResults) {
|
|
||||||
if (resultSet.contains(result.getId())) {
|
|
||||||
trimStats.increaseRemovedDupsCount();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
newResults.add(result);
|
|
||||||
resultSet.add(result.getId());
|
|
||||||
}
|
|
||||||
|
|
||||||
searchResults.setResults(newResults);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final int addResponsesToCollector(MultiwayMergeCollector collector) {
|
|
||||||
int totalResultSize = 0;
|
|
||||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
if (response.isSetSearchResults()) {
|
|
||||||
totalResultSize += response.getSearchResults().getResultsSize();
|
|
||||||
}
|
|
||||||
collector.addResponse(response);
|
|
||||||
}
|
|
||||||
return totalResultSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given a sorted searchResults (for recency, sorted by ID; for relevance, sorted by score),
|
|
||||||
* returns the first 'computeNumResultsToKeep()' number of results.
|
|
||||||
*
|
|
||||||
* @param searchResults the searchResults to be truncated.
|
|
||||||
*/
|
|
||||||
protected final void truncateResults(ThriftSearchResults searchResults, TrimStats trimStats) {
|
|
||||||
int numResultsRequested = computeNumResultsToKeep();
|
|
||||||
|
|
||||||
int to = numResultsRequested == Integer.MAX_VALUE ? searchResults.getResultsSize()
|
|
||||||
: Math.min(numResultsRequested, searchResults.getResultsSize());
|
|
||||||
if (searchResults.getResultsSize() > to) {
|
|
||||||
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - to);
|
|
||||||
|
|
||||||
if (to > 0) {
|
|
||||||
searchResults.setResults(searchResults.getResults().subList(0, to));
|
|
||||||
} else {
|
|
||||||
// No more results for the next page
|
|
||||||
EARLYBIRD_RESPONSE_NO_MORE_RESULTS.increment();
|
|
||||||
searchResults.setResults(Collections.<ThriftSearchResult>emptyList());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EarlybirdRequest getEarlybirdRequest() {
|
|
||||||
return requestContext.getRequest();
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,353 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.collect.Sets;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.logging.DebugMessageBuilder;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.ranking.thriftjava.ThriftFacetRankingOptions;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant;
|
|
||||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftFacetCount;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftFacetCountMetadata;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftFacetFieldResults;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftFacetResults;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merger class to merge facets EarlybirdResponse objects
|
|
||||||
*/
|
|
||||||
public class FacetResponseMerger extends EarlybirdResponseMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(FacetResponseMerger.class);
|
|
||||||
|
|
||||||
private static final SearchTimerStats TIMER =
|
|
||||||
SearchTimerStats.export("merge_facets", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
|
||||||
private final DebugMessageBuilder debugMessageBuilder;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor to create the merger
|
|
||||||
*/
|
|
||||||
public FacetResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode) {
|
|
||||||
super(requestContext, responses, mode);
|
|
||||||
debugMessageBuilder = responseMessageBuilder.getDebugMessageBuilder();
|
|
||||||
debugMessageBuilder.verbose("--- Request Received: %s", requestContext.getRequest());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return TIMER;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getDefaultSuccessResponseThreshold() {
|
|
||||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected EarlybirdResponse internalMerge(EarlybirdResponse facetsResponse) {
|
|
||||||
|
|
||||||
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap =
|
|
||||||
new HashMap<>();
|
|
||||||
final Set<Long> userIDWhitelist = new HashSet<>();
|
|
||||||
|
|
||||||
// First, parse the responses and build up our facet info map.
|
|
||||||
boolean termStatsFilteringMode = FacetsResultsUtils.prepareFieldInfoMap(
|
|
||||||
requestContext.getRequest().getFacetRequest(), facetFieldInfoMap);
|
|
||||||
// Iterate through all futures and get results.
|
|
||||||
collectResponsesAndPopulateMap(facetFieldInfoMap, userIDWhitelist);
|
|
||||||
|
|
||||||
// Next, aggregate the top facets and update the blender response.
|
|
||||||
facetsResponse
|
|
||||||
.setFacetResults(new ThriftFacetResults()
|
|
||||||
.setFacetFields(new HashMap<>())
|
|
||||||
.setUserIDWhitelist(userIDWhitelist));
|
|
||||||
|
|
||||||
// keep track of how many facets a user contributed - this map gets reset for every field
|
|
||||||
Map<Long, Integer> perFieldAntiGamingMap = new HashMap<>();
|
|
||||||
|
|
||||||
// this one is used for images and twimges
|
|
||||||
Map<Long, Integer> imagesAntiGamingMap = new HashMap<>();
|
|
||||||
|
|
||||||
Set<String> twimgDedupSet = null;
|
|
||||||
|
|
||||||
for (final Map.Entry<String, FacetsResultsUtils.FacetFieldInfo> entry
|
|
||||||
: facetFieldInfoMap.entrySet()) {
|
|
||||||
// reset for each field
|
|
||||||
String field = entry.getKey();
|
|
||||||
final Map<Long, Integer> antiGamingMap;
|
|
||||||
if (field.equals(EarlybirdFieldConstant.IMAGES_FACET)
|
|
||||||
|| field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
|
|
||||||
antiGamingMap = imagesAntiGamingMap;
|
|
||||||
} else {
|
|
||||||
perFieldAntiGamingMap.clear();
|
|
||||||
antiGamingMap = perFieldAntiGamingMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftFacetFieldResults results = new ThriftFacetFieldResults();
|
|
||||||
FacetsResultsUtils.FacetFieldInfo info = entry.getValue();
|
|
||||||
results.setTotalCount(info.totalCounts);
|
|
||||||
results.setTopFacets(new ArrayList<>());
|
|
||||||
FacetsResultsUtils.fillTopLanguages(info, results);
|
|
||||||
if (info.topFacets != null && !info.topFacets.isEmpty()) {
|
|
||||||
fillFacetFieldResults(info, antiGamingMap, results);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
|
|
||||||
if (twimgDedupSet == null) {
|
|
||||||
twimgDedupSet = Sets.newHashSet();
|
|
||||||
}
|
|
||||||
FacetsResultsUtils.dedupTwimgFacet(twimgDedupSet, results, debugMessageBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
facetsResponse.getFacetResults().putToFacetFields(entry.getKey(), results);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!termStatsFilteringMode) {
|
|
||||||
// in term stats filtering mode, if doing it here would break term stats filtering
|
|
||||||
FacetsResultsUtils.mergeTwimgResults(
|
|
||||||
facetsResponse.getFacetResults(),
|
|
||||||
Collections.<ThriftFacetCount>reverseOrder(
|
|
||||||
FacetsResultsUtils.getFacetCountComparator(
|
|
||||||
requestContext.getRequest().getFacetRequest())));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the numHitsProcessed on ThriftSearchResults.
|
|
||||||
int numHitsProcessed = 0;
|
|
||||||
int numPartitionsEarlyTerminated = 0;
|
|
||||||
for (EarlybirdResponse earlybirdResponse: accumulatedResponses.getSuccessResponses()) {
|
|
||||||
ThriftSearchResults searchResults = earlybirdResponse.getSearchResults();
|
|
||||||
if (searchResults != null) {
|
|
||||||
numHitsProcessed += searchResults.getNumHitsProcessed();
|
|
||||||
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ThriftSearchResults searchResults = new ThriftSearchResults();
|
|
||||||
searchResults.setResults(new ArrayList<>()); // required field
|
|
||||||
searchResults.setNumHitsProcessed(numHitsProcessed);
|
|
||||||
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
|
|
||||||
facetsResponse.setSearchResults(searchResults);
|
|
||||||
|
|
||||||
LOG.debug("Facets call completed successfully: {}", facetsResponse);
|
|
||||||
|
|
||||||
FacetsResultsUtils.fixNativePhotoUrl(facetsResponse);
|
|
||||||
return facetsResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void fillFacetFieldResults(FacetsResultsUtils.FacetFieldInfo facetFieldInfo,
|
|
||||||
Map<Long, Integer> antiGamingMap,
|
|
||||||
ThriftFacetFieldResults results) {
|
|
||||||
int minWeightedCount = 0;
|
|
||||||
int minSimpleCount = 0;
|
|
||||||
int maxPenaltyCount = Integer.MAX_VALUE;
|
|
||||||
double maxPenaltyCountRatio = 1;
|
|
||||||
boolean excludePossiblySensitiveFacets = false;
|
|
||||||
boolean onlyReturnFacetsWithDisplayTweet = false;
|
|
||||||
int maxHitsPerUser = -1;
|
|
||||||
|
|
||||||
EarlybirdRequest request = requestContext.getRequest();
|
|
||||||
if (request.getFacetRequest() != null) {
|
|
||||||
ThriftFacetRankingOptions rankingOptions = request.getFacetRequest().getFacetRankingOptions();
|
|
||||||
|
|
||||||
if (request.getSearchQuery() != null) {
|
|
||||||
maxHitsPerUser = request.getSearchQuery().getMaxHitsPerUser();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rankingOptions != null) {
|
|
||||||
LOG.debug("FacetsResponseMerger: Using rankingOptions={}", rankingOptions);
|
|
||||||
|
|
||||||
if (rankingOptions.isSetMinCount()) {
|
|
||||||
minWeightedCount = rankingOptions.getMinCount();
|
|
||||||
}
|
|
||||||
if (rankingOptions.isSetMinSimpleCount()) {
|
|
||||||
minSimpleCount = rankingOptions.getMinSimpleCount();
|
|
||||||
}
|
|
||||||
if (rankingOptions.isSetMaxPenaltyCount()) {
|
|
||||||
maxPenaltyCount = rankingOptions.getMaxPenaltyCount();
|
|
||||||
}
|
|
||||||
if (rankingOptions.isSetMaxPenaltyCountRatio()) {
|
|
||||||
maxPenaltyCountRatio = rankingOptions.getMaxPenaltyCountRatio();
|
|
||||||
}
|
|
||||||
if (rankingOptions.isSetExcludePossiblySensitiveFacets()) {
|
|
||||||
excludePossiblySensitiveFacets = rankingOptions.isExcludePossiblySensitiveFacets();
|
|
||||||
}
|
|
||||||
if (rankingOptions.isSetOnlyReturnFacetsWithDisplayTweet()) {
|
|
||||||
onlyReturnFacetsWithDisplayTweet = rankingOptions.isOnlyReturnFacetsWithDisplayTweet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.warn("earlybirdRequest.getFacetRequest() is null");
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftFacetCount[] topFacetsArray = new ThriftFacetCount[facetFieldInfo.topFacets.size()];
|
|
||||||
|
|
||||||
facetFieldInfo.topFacets.values().toArray(topFacetsArray);
|
|
||||||
Arrays.sort(topFacetsArray, Collections.<ThriftFacetCount>reverseOrder(
|
|
||||||
FacetsResultsUtils.getFacetCountComparator(request.getFacetRequest())));
|
|
||||||
|
|
||||||
int numResults = capFacetFieldWidth(facetFieldInfo.fieldRequest.numResults);
|
|
||||||
|
|
||||||
if (topFacetsArray.length < numResults) {
|
|
||||||
numResults = topFacetsArray.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
int collected = 0;
|
|
||||||
for (int i = 0; i < topFacetsArray.length; ++i) {
|
|
||||||
ThriftFacetCount count = topFacetsArray[i];
|
|
||||||
|
|
||||||
if (onlyReturnFacetsWithDisplayTweet
|
|
||||||
&& (!count.isSetMetadata() || !count.getMetadata().isSetStatusId()
|
|
||||||
|| count.getMetadata().getStatusId() == -1)) {
|
|
||||||
// status id must be set
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (excludePossiblySensitiveFacets && count.isSetMetadata()
|
|
||||||
&& count.getMetadata().isStatusPossiblySensitive()) {
|
|
||||||
// the display tweet may be offensive or NSFW
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2("[%d] FacetsResponseMerger EXCLUDED: offensive or NSFW %s, "
|
|
||||||
+ "explanation: %s",
|
|
||||||
i, facetCountSummary(count),
|
|
||||||
count.getMetadata().getExplanation());
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean filterOutUser = false;
|
|
||||||
if (maxHitsPerUser != -1 && count.isSetMetadata()) {
|
|
||||||
ThriftFacetCountMetadata metadata = count.getMetadata();
|
|
||||||
if (!metadata.dontFilterUser) {
|
|
||||||
long twitterUserId = metadata.getTwitterUserId();
|
|
||||||
int numResultsFromUser = 1;
|
|
||||||
if (twitterUserId != -1) {
|
|
||||||
Integer perUser = antiGamingMap.get(twitterUserId);
|
|
||||||
if (perUser != null) {
|
|
||||||
numResultsFromUser = perUser + 1;
|
|
||||||
filterOutUser = numResultsFromUser > maxHitsPerUser;
|
|
||||||
}
|
|
||||||
antiGamingMap.put(twitterUserId, numResultsFromUser);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter facets those don't meet the basic criteria.
|
|
||||||
if (count.getSimpleCount() < minSimpleCount) {
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2(
|
|
||||||
"[%d] FacetsResponseMerger EXCLUDED: simpleCount:%d < minSimpleCount:%d, %s",
|
|
||||||
i, count.getSimpleCount(), minSimpleCount, facetCountSummary(count));
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (count.getWeightedCount() < minWeightedCount) {
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2(
|
|
||||||
"[%d] FacetsResponseMerger EXCLUDED: weightedCount:%d < minWeightedCount:%d, %s",
|
|
||||||
i, count.getWeightedCount(), minWeightedCount, facetCountSummary(count));
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (filterOutUser) {
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2(
|
|
||||||
"[%d] FacetsResponseMerger EXCLUDED: antiGaming filterd user: %d: %s",
|
|
||||||
i, count.getMetadata().getTwitterUserId(), facetCountSummary(count));
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (count.getPenaltyCount() > maxPenaltyCount) {
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2(
|
|
||||||
"[%d] FacetsResponseMerger EXCLUCED: penaltyCount:%.3f > maxPenaltyCount:%.3f, %s",
|
|
||||||
i, count.getPenaltyCount(), maxPenaltyCount, facetCountSummary(count));
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (((double) count.getPenaltyCount() / count.getSimpleCount()) > maxPenaltyCountRatio) {
|
|
||||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
|
||||||
debugMessageBuilder.verbose2(
|
|
||||||
"[%d] FacetsResponseMerger EXCLUDED: penaltyCountRatio: %.3f > "
|
|
||||||
+ "maxPenaltyCountRatio:%.3f, %s",
|
|
||||||
i, (double) count.getPenaltyCount() / count.getSimpleCount(), maxPenaltyCountRatio,
|
|
||||||
facetCountSummary(count));
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
results.addToTopFacets(count);
|
|
||||||
|
|
||||||
collected++;
|
|
||||||
if (collected >= numResults) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int capFacetFieldWidth(int numResults) {
|
|
||||||
int ret = numResults;
|
|
||||||
if (numResults <= 0) {
|
|
||||||
// this in theory should not be allowed, but for now we issue the request with goodwill length
|
|
||||||
ret = 10; // default to 10 for future merge code to terminate correctly
|
|
||||||
}
|
|
||||||
if (numResults >= 100) {
|
|
||||||
ret = 100;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String facetCountSummary(final ThriftFacetCount count) {
|
|
||||||
if (count.isSetMetadata()) {
|
|
||||||
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f, sid:%d (%s))",
|
|
||||||
count.getFacetLabel(), count.getSimpleCount(), count.getWeightedCount(),
|
|
||||||
count.getPenaltyCount(), count.getScore(), count.getMetadata().getStatusId(),
|
|
||||||
count.getMetadata().getStatusLanguage());
|
|
||||||
} else {
|
|
||||||
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f)", count.getFacetLabel(),
|
|
||||||
count.getSimpleCount(), count.getWeightedCount(), count.getPenaltyCount(),
|
|
||||||
count.getScore());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterate through the backend responses and fill up the FacetFieldInfo map.
|
|
||||||
private void collectResponsesAndPopulateMap(
|
|
||||||
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap,
|
|
||||||
final Set<Long> userIDWhitelist) {
|
|
||||||
// Next, iterate through the backend responses.
|
|
||||||
int i = 0;
|
|
||||||
for (EarlybirdResponse facetsResponse : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
if (facetsResponse.isSetFacetResults()) {
|
|
||||||
LOG.debug("Facet response from earlybird {} is {} ", i, facetsResponse.getFacetResults());
|
|
||||||
i++;
|
|
||||||
ThriftFacetResults facetResults = facetsResponse.getFacetResults();
|
|
||||||
if (facetResults.isSetUserIDWhitelist()) {
|
|
||||||
userIDWhitelist.addAll(facetResults.getUserIDWhitelist());
|
|
||||||
}
|
|
||||||
FacetsResultsUtils.fillFacetFieldInfo(
|
|
||||||
facetResults, facetFieldInfoMap,
|
|
||||||
userIDWhitelist);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG.debug("Earlybird facet response total size {}", i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Binary file not shown.
@ -1,44 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
|
|
||||||
|
|
||||||
public final class PartitionResponseAccumulator extends ResponseAccumulator {
|
|
||||||
private static final String TARGET_TYPE_PARTITION = "partition";
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getNameForLogging(int responseIndex, int numTotalResponses) {
|
|
||||||
return TARGET_TYPE_PARTITION + responseIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
|
|
||||||
// We do not need to differentiate between partitions: we just want to get the number of
|
|
||||||
// responses returned by Earlybirds, for each EarlybirdResponseCode.
|
|
||||||
return TARGET_TYPE_PARTITION;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void handleSkippedResponse(EarlybirdResponseCode responseCode) { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void handleErrorResponse(EarlybirdResponse response) {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
|
|
||||||
return new AccumulatedResponses.PartitionCounts(getNumResponses(),
|
|
||||||
getSuccessResponses().size() + getSuccessfulEmptyResponseCount(), null);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean isMergingAcrossTiers() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,638 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
|
||||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
|
||||||
import com.twitter.search.common.relevance.utils.ResultComparators;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.collectors.RecencyMergeCollector;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.ALREADY_EARLY_TERMINATED;
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.FILTERED;
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.FILTERED_AND_TRUNCATED;
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.NOT_EARLY_TERMINATED;
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.TERMINATED_GOT_EXACT_NUM_RESULTS;
|
|
||||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
|
||||||
.EarlyTerminationTrimmingStats.Type.TRUNCATED;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merger class to merge recency search EarlybirdResponse objects.
|
|
||||||
*/
|
|
||||||
public class RecencyResponseMerger extends EarlybirdResponseMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(RecencyResponseMerger.class);
|
|
||||||
|
|
||||||
private static final SearchTimerStats RECENCY_TIMER =
|
|
||||||
SearchTimerStats.export("merge_recency", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final String TERMINATED_COLLECTED_ENOUGH_RESULTS =
|
|
||||||
"terminated_collected_enough_results";
|
|
||||||
|
|
||||||
// Allowed replication lag relative to all replicas. Replication lag exceeding
|
|
||||||
// this amount may result in some tweets from the replica not returned in search.
|
|
||||||
private static final long ALLOWED_REPLICATION_LAG_MS = 10000;
|
|
||||||
|
|
||||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchCounter RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS =
|
|
||||||
SearchCounter.export("merger_recency_zero_result_count_after_filtering_max_min_ids");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final SearchCounter RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT =
|
|
||||||
SearchCounter.export("merger_recency_trimmed_too_many_results_count");
|
|
||||||
|
|
||||||
private static final SearchCounter RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
|
|
||||||
SearchCounter.export("merger_recency_tier_merge_early_terminated_with_not_enough_results");
|
|
||||||
|
|
||||||
private static final SearchCounter RECENCY_CLEARED_EARLY_TERMINATION_COUNT =
|
|
||||||
SearchCounter.export("merger_recency_cleared_early_termination_count");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Results were truncated because merged results exceeded the requested numResults.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
static final String MERGING_EARLY_TERMINATION_REASON_TRUNCATED =
|
|
||||||
"root_merging_truncated_results";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Results that were were filtered smaller than merged minSearchedStatusId were filtered out.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
static final String MERGING_EARLY_TERMINATION_REASON_FILTERED =
|
|
||||||
"root_merging_filtered_results";
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
|
||||||
new EarlyTerminationTrimmingStats("recency_partition_merging");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
|
||||||
new EarlyTerminationTrimmingStats("recency_tier_merging");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static class EarlyTerminationTrimmingStats {
|
|
||||||
|
|
||||||
enum Type {
|
|
||||||
/**
|
|
||||||
* The whole result was not terminated at all.
|
|
||||||
*/
|
|
||||||
NOT_EARLY_TERMINATED,
|
|
||||||
/**
|
|
||||||
* Was terminated before we did any trimming.
|
|
||||||
*/
|
|
||||||
ALREADY_EARLY_TERMINATED,
|
|
||||||
/**
|
|
||||||
* Was not terminated when merged, but results were filtered due to min/max ranges.
|
|
||||||
*/
|
|
||||||
FILTERED,
|
|
||||||
/**
|
|
||||||
* Was not terminated when merged, but results were truncated.
|
|
||||||
*/
|
|
||||||
TRUNCATED,
|
|
||||||
/**
|
|
||||||
* Was not terminated when merged, but results were filtered due to min/max ranges and
|
|
||||||
* truncated.
|
|
||||||
*/
|
|
||||||
FILTERED_AND_TRUNCATED,
|
|
||||||
/**
|
|
||||||
* When the search asks for X result, and we get exactly X results back, without trimming
|
|
||||||
* or truncating on the tail side (min_id side), we still mark the search as early terminated.
|
|
||||||
* This is because later tiers possibly has more results.
|
|
||||||
*/
|
|
||||||
TERMINATED_GOT_EXACT_NUM_RESULTS,
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A counter tracking merged responses for each {@link EarlyTerminationTrimmingStats.Type}
|
|
||||||
* define above.
|
|
||||||
*/
|
|
||||||
private final ImmutableMap<Type, SearchCounter> searchCounterMap;
|
|
||||||
|
|
||||||
EarlyTerminationTrimmingStats(String prefix) {
|
|
||||||
Map<Type, SearchCounter> tempMap = Maps.newEnumMap(Type.class);
|
|
||||||
|
|
||||||
tempMap.put(NOT_EARLY_TERMINATED,
|
|
||||||
SearchCounter.export(prefix + "_not_early_terminated_after_merging"));
|
|
||||||
tempMap.put(ALREADY_EARLY_TERMINATED,
|
|
||||||
SearchCounter.export(prefix + "_early_terminated_before_merge_trimming"));
|
|
||||||
tempMap.put(TRUNCATED,
|
|
||||||
SearchCounter.export(prefix + "_early_terminated_after_merging_truncated"));
|
|
||||||
tempMap.put(FILTERED,
|
|
||||||
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered"));
|
|
||||||
tempMap.put(FILTERED_AND_TRUNCATED,
|
|
||||||
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered_and_truncated"));
|
|
||||||
tempMap.put(TERMINATED_GOT_EXACT_NUM_RESULTS,
|
|
||||||
SearchCounter.export(prefix + "_early_terminated_after_merging_got_exact_num_results"));
|
|
||||||
|
|
||||||
searchCounterMap = Maps.immutableEnumMap(tempMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SearchCounter getCounterFor(Type type) {
|
|
||||||
return searchCounterMap.get(type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
|
||||||
|
|
||||||
public RecencyResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode,
|
|
||||||
EarlybirdFeatureSchemaMerger featureSchemaMerger) {
|
|
||||||
super(requestContext, responses, mode);
|
|
||||||
this.featureSchemaMerger = featureSchemaMerger;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getDefaultSuccessResponseThreshold() {
|
|
||||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return RECENCY_TIMER;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
|
||||||
// The merged maxSearchedStatusId and minSearchedStatusId
|
|
||||||
long maxId = findMaxFullySearchedStatusID();
|
|
||||||
long minId = findMinFullySearchedStatusID();
|
|
||||||
|
|
||||||
RecencyMergeCollector collector = new RecencyMergeCollector(responses.size());
|
|
||||||
int totalResultSize = addResponsesToCollector(collector);
|
|
||||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
|
||||||
|
|
||||||
TrimStats trimStats = trimResults(searchResults, minId, maxId);
|
|
||||||
setMergedMaxSearchedStatusId(searchResults, maxId);
|
|
||||||
setMergedMinSearchedStatusId(
|
|
||||||
searchResults, minId, trimStats.getResultsTruncatedFromTailCount() > 0);
|
|
||||||
|
|
||||||
mergedResponse.setSearchResults(searchResults);
|
|
||||||
|
|
||||||
// Override some components of the response as appropriate to real-time.
|
|
||||||
searchResults.setHitCounts(aggregateHitCountMap());
|
|
||||||
if (accumulatedResponses.isMergingPartitionsWithinATier()
|
|
||||||
&& clearEarlyTerminationIfReachingTierBottom(mergedResponse)) {
|
|
||||||
RECENCY_CLEARED_EARLY_TERMINATION_COUNT.increment();
|
|
||||||
} else {
|
|
||||||
setEarlyTerminationForTrimmedResults(mergedResponse, trimStats);
|
|
||||||
}
|
|
||||||
|
|
||||||
responseMessageBuilder.debugVerbose("Hits: %s %s", totalResultSize, trimStats);
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Hash Partitioned Earlybird call completed successfully: %s", mergedResponse);
|
|
||||||
|
|
||||||
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
|
|
||||||
searchResults,
|
|
||||||
requestContext,
|
|
||||||
"merger_recency_tier",
|
|
||||||
accumulatedResponses.getSuccessResponses());
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When we reached tier bottom, pagination can stop working even though we haven't got
|
|
||||||
* all results. e.g.
|
|
||||||
* Results from partition 1: [101 91 81], minSearchedStatusId is 81
|
|
||||||
* Results from Partition 2: [102 92], minSearchedStatusId is 92, not early terminated.
|
|
||||||
*
|
|
||||||
* After merge, we get [102, 101, 92], with minResultId == 92. Since results from
|
|
||||||
* partition 2 is not early terminated, 92 is the tier bottom here. Since results are
|
|
||||||
* filtered, early termination for merged result is set to true, so blender will call again,
|
|
||||||
* with maxDocId == 91. This time we get result:
|
|
||||||
* Results from partition 1: [91 81], minSearchedStatusId is 81
|
|
||||||
* Results from partition 2: [], minSearchedStatusId is still 92
|
|
||||||
* After merge we get [] and minSearchedStatusId is still 92. No progress can be made on
|
|
||||||
* pagination and clients get stuck.
|
|
||||||
*
|
|
||||||
* So in this case, we clear the early termination flag to tell blender there is no more
|
|
||||||
* result in this tier. Tweets below tier bottom will be missed, but that also happens
|
|
||||||
* without this step, as the next pagination call will return empty results anyway.
|
|
||||||
* So even if there is NOT overlap between tiers, this is still better.
|
|
||||||
*
|
|
||||||
* Return true if early termination is cleared due to this, otherwise return false.
|
|
||||||
* To be safe, we do nothing here to keep existing behavior and only override it in
|
|
||||||
* StrictRecencyResponseMerger.
|
|
||||||
*/
|
|
||||||
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines if the merged response should be early-terminated when it has exactly as many
|
|
||||||
* trimmed results as requested, as is not early-terminated because of other reasons.
|
|
||||||
*/
|
|
||||||
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the end results were trimmed in any way, reflect that in the response as a query that was
|
|
||||||
* early terminated. A response can be either (1) truncated because we merged more results than
|
|
||||||
* what was asked for with numResults, or (2) we filtered results that were smaller than the
|
|
||||||
* merged minSearchedStatusId.
|
|
||||||
*
|
|
||||||
* @param mergedResponse the merged response.
|
|
||||||
* @param trimStats trim stats for this merge.
|
|
||||||
*/
|
|
||||||
private void setEarlyTerminationForTrimmedResults(
|
|
||||||
EarlybirdResponse mergedResponse,
|
|
||||||
TrimStats trimStats) {
|
|
||||||
|
|
||||||
responseMessageBuilder.debugVerbose("Checking for merge trimming, trimStats %s", trimStats);
|
|
||||||
|
|
||||||
EarlyTerminationTrimmingStats stats = getEarlyTerminationTrimmingStats();
|
|
||||||
|
|
||||||
EarlyTerminationInfo earlyTerminationInfo = mergedResponse.getEarlyTerminationInfo();
|
|
||||||
Preconditions.checkNotNull(earlyTerminationInfo);
|
|
||||||
|
|
||||||
if (!earlyTerminationInfo.isEarlyTerminated()) {
|
|
||||||
if (trimStats.getMinIdFilterCount() > 0 || trimStats.getResultsTruncatedFromTailCount() > 0) {
|
|
||||||
responseMessageBuilder.debugVerbose("Setting early termination, trimStats: %s, results: %s",
|
|
||||||
trimStats, mergedResponse);
|
|
||||||
|
|
||||||
earlyTerminationInfo.setEarlyTerminated(true);
|
|
||||||
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
|
|
||||||
|
|
||||||
if (trimStats.getMinIdFilterCount() > 0
|
|
||||||
&& trimStats.getResultsTruncatedFromTailCount() > 0) {
|
|
||||||
stats.getCounterFor(FILTERED_AND_TRUNCATED).increment();
|
|
||||||
} else if (trimStats.getMinIdFilterCount() > 0) {
|
|
||||||
stats.getCounterFor(FILTERED).increment();
|
|
||||||
} else if (trimStats.getResultsTruncatedFromTailCount() > 0) {
|
|
||||||
stats.getCounterFor(TRUNCATED).increment();
|
|
||||||
} else {
|
|
||||||
Preconditions.checkState(false, "Invalid TrimStats: %s", trimStats);
|
|
||||||
}
|
|
||||||
} else if ((computeNumResultsToKeep() == mergedResponse.getSearchResults().getResultsSize())
|
|
||||||
&& shouldEarlyTerminateWhenEnoughTrimmedResults()) {
|
|
||||||
earlyTerminationInfo.setEarlyTerminated(true);
|
|
||||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
|
||||||
TERMINATED_COLLECTED_ENOUGH_RESULTS);
|
|
||||||
stats.getCounterFor(TERMINATED_GOT_EXACT_NUM_RESULTS).increment();
|
|
||||||
} else {
|
|
||||||
stats.getCounterFor(NOT_EARLY_TERMINATED).increment();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stats.getCounterFor(ALREADY_EARLY_TERMINATED).increment();
|
|
||||||
// Even if the results were already marked as early terminated, we can add additional
|
|
||||||
// reasons for debugging (if the merged results were filtered or truncated).
|
|
||||||
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addEarlyTerminationReasons(
|
|
||||||
EarlyTerminationInfo earlyTerminationInfo,
|
|
||||||
TrimStats trimStats) {
|
|
||||||
|
|
||||||
if (trimStats.getMinIdFilterCount() > 0) {
|
|
||||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
|
||||||
MERGING_EARLY_TERMINATION_REASON_FILTERED);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trimStats.getResultsTruncatedFromTailCount() > 0) {
|
|
||||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
|
||||||
MERGING_EARLY_TERMINATION_REASON_TRUNCATED);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStats() {
|
|
||||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
|
||||||
return getEarlyTerminationTrimmingStatsForPartitions();
|
|
||||||
} else {
|
|
||||||
return getEarlyTerminationTrimmingStatsForTiers();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
|
|
||||||
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
|
|
||||||
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If we get enough results, no need to go on.
|
|
||||||
* If one of the partitions early terminated, we can't go on or else there could be a gap.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
|
||||||
boolean foundEarlyTermination) {
|
|
||||||
|
|
||||||
|
|
||||||
int resultsRequested = computeNumResultsToKeep();
|
|
||||||
|
|
||||||
boolean shouldEarlyTerminate = foundEarlyTermination
|
|
||||||
|| totalResultsFromSuccessfulShards >= resultsRequested;
|
|
||||||
|
|
||||||
if (shouldEarlyTerminate && totalResultsFromSuccessfulShards < resultsRequested) {
|
|
||||||
RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
return shouldEarlyTerminate;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the min status id that has been _completely_ searched across all partitions. The
|
|
||||||
* largest min status id across all partitions.
|
|
||||||
*
|
|
||||||
* @return the min searched status id found
|
|
||||||
*/
|
|
||||||
protected long findMinFullySearchedStatusID() {
|
|
||||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
|
||||||
if (minIds.isEmpty()) {
|
|
||||||
return Long.MIN_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
|
||||||
// When merging partitions, the min ID should be the largest among the min IDs.
|
|
||||||
return Collections.max(accumulatedResponses.getMinIds());
|
|
||||||
} else {
|
|
||||||
// When merging tiers, the min ID should be the smallest among the min IDs.
|
|
||||||
return Collections.min(accumulatedResponses.getMinIds());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the max status id that has been _completely_ searched across all partitions. The
|
|
||||||
* smallest max status id across all partitions.
|
|
||||||
*
|
|
||||||
* This is where we reconcile replication lag by selecting the oldest maxid from the
|
|
||||||
* partitions searched.
|
|
||||||
*
|
|
||||||
* @return the max searched status id found
|
|
||||||
*/
|
|
||||||
protected long findMaxFullySearchedStatusID() {
|
|
||||||
List<Long> maxIDs = accumulatedResponses.getMaxIds();
|
|
||||||
if (maxIDs.isEmpty()) {
|
|
||||||
return Long.MAX_VALUE;
|
|
||||||
}
|
|
||||||
Collections.sort(maxIDs);
|
|
||||||
|
|
||||||
final long newest = maxIDs.get(maxIDs.size() - 1);
|
|
||||||
final long newestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(newest);
|
|
||||||
|
|
||||||
for (int i = 0; i < maxIDs.size(); i++) {
|
|
||||||
long oldest = maxIDs.get(i);
|
|
||||||
long oldestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(oldest);
|
|
||||||
long deltaMs = newestTimestamp - oldestTimestamp;
|
|
||||||
|
|
||||||
if (i == 0) {
|
|
||||||
LOG.debug("Max delta is {}", deltaMs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (deltaMs < ALLOWED_REPLICATION_LAG_MS) {
|
|
||||||
if (i != 0) {
|
|
||||||
LOG.debug("{} partition replicas lagging more than {} ms", i, ALLOWED_REPLICATION_LAG_MS);
|
|
||||||
}
|
|
||||||
return oldest;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can't get here - by this point oldest == newest, and delta is 0.
|
|
||||||
return newest;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim the ThriftSearchResults if we have enough results, to return the first
|
|
||||||
* 'computeNumResultsToKeep()' number of results.
|
|
||||||
*
|
|
||||||
* If we don't have enough results after trimming, this function will first try to back fill
|
|
||||||
* older results, then newer results
|
|
||||||
*
|
|
||||||
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
|
|
||||||
* @return TrimStats containing statistics about how many results being removed
|
|
||||||
*/
|
|
||||||
protected TrimStats trimResults(
|
|
||||||
ThriftSearchResults searchResults,
|
|
||||||
long mergedMin,
|
|
||||||
long mergedMax) {
|
|
||||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
|
||||||
// no results, no trimming needed
|
|
||||||
return TrimStats.EMPTY_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
|
|
||||||
// Not a normal search, no trimming needed
|
|
||||||
return TrimStats.EMPTY_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrimStats trimStats = new TrimStats();
|
|
||||||
trimExactDups(searchResults, trimStats);
|
|
||||||
|
|
||||||
int numResultsRequested = computeNumResultsToKeep();
|
|
||||||
if (shouldSkipTrimmingWhenNotEnoughResults(searchResults, numResultsRequested)) {
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
// We don't have enough results, let's not do trimming
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
return trimStats;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
|
||||||
trimResultsBasedSearchedRange(
|
|
||||||
searchResults, trimStats, numResultsRequested, mergedMin, mergedMax);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Respect "computeNumResultsToKeep()" here, only keep "computeNumResultsToKeep()" results.
|
|
||||||
truncateResults(searchResults, trimStats);
|
|
||||||
|
|
||||||
return trimStats;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When there's not enough results, we don't remove results based on the searched range.
|
|
||||||
* This has a tradeoff: with this, we don't reduce our recall when we already don't have enough
|
|
||||||
* results. However, with this, we can lose results while paginating because we return results
|
|
||||||
* outside of the valid searched range.
|
|
||||||
*/
|
|
||||||
protected boolean shouldSkipTrimmingWhenNotEnoughResults(
|
|
||||||
ThriftSearchResults searchResults, int numResultsRequested) {
|
|
||||||
return searchResults.getResultsSize() <= numResultsRequested;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim results based on search range. The search range [x, y] is determined by:
|
|
||||||
* x is the maximun of the minimun search IDs;
|
|
||||||
* y is the minimun of the maximum search IDs.
|
|
||||||
*
|
|
||||||
* Ids out side of this range are removed.
|
|
||||||
* If we do not get enough results after the removal, we add IDs back until we get enough results.
|
|
||||||
* We first add IDs back from the older side back. If there's still not enough results,
|
|
||||||
* we start adding IDs from the newer side back.
|
|
||||||
*/
|
|
||||||
private void trimResultsBasedSearchedRange(ThriftSearchResults searchResults,
|
|
||||||
TrimStats trimStats,
|
|
||||||
int numResultsRequested,
|
|
||||||
long mergedMin,
|
|
||||||
long mergedMax) {
|
|
||||||
///////////////////////////////////////////////////////////////////
|
|
||||||
// we have more results than requested, let's do some trimming
|
|
||||||
///////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Save the original results before trimming
|
|
||||||
List<ThriftSearchResult> originalResults = searchResults.getResults();
|
|
||||||
|
|
||||||
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
|
|
||||||
|
|
||||||
// This does happen. It is hard to say what we should do here so we just return the original
|
|
||||||
// result here.
|
|
||||||
if (searchResults.getResultsSize() == 0) {
|
|
||||||
RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS.increment();
|
|
||||||
searchResults.setResults(originalResults);
|
|
||||||
|
|
||||||
// Clean up min/mix filtered count, since we're bringing back whatever we just filtered.
|
|
||||||
trimStats.clearMaxIdFilterCount();
|
|
||||||
trimStats.clearMinIdFilterCount();
|
|
||||||
|
|
||||||
if (LOG.isDebugEnabled() || responseMessageBuilder.isDebugMode()) {
|
|
||||||
String errMsg = "No trimming is done as filtered results is empty. "
|
|
||||||
+ "maxId=" + mergedMax + ",minId=" + mergedMin;
|
|
||||||
LOG.debug(errMsg);
|
|
||||||
responseMessageBuilder.append(errMsg + "\n");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// oops! we're trimming too many results. Let's put some back
|
|
||||||
if (searchResults.getResultsSize() < numResultsRequested) {
|
|
||||||
RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT.increment();
|
|
||||||
|
|
||||||
List<ThriftSearchResult> trimmedResults = searchResults.getResults();
|
|
||||||
long firstTrimmedResultId = trimmedResults.get(0).getId();
|
|
||||||
long lastTrimmedResultId = trimmedResults.get(trimmedResults.size() - 1).getId();
|
|
||||||
|
|
||||||
// First, try to back fill with older results
|
|
||||||
int i = 0;
|
|
||||||
for (; i < originalResults.size(); ++i) {
|
|
||||||
ThriftSearchResult result = originalResults.get(i);
|
|
||||||
if (result.getId() < lastTrimmedResultId) {
|
|
||||||
trimmedResults.add(result);
|
|
||||||
trimStats.decreaseMinIdFilterCount();
|
|
||||||
if (trimmedResults.size() >= numResultsRequested) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// still not enough results? back fill with newer results
|
|
||||||
// find the oldest of the newer results
|
|
||||||
if (trimmedResults.size() < numResultsRequested) {
|
|
||||||
// still not enough results? back fill with newer results
|
|
||||||
// find the oldest of the newer results
|
|
||||||
for (i = originalResults.size() - 1; i >= 0; --i) {
|
|
||||||
ThriftSearchResult result = originalResults.get(i);
|
|
||||||
if (result.getId() > firstTrimmedResultId) {
|
|
||||||
trimmedResults.add(result);
|
|
||||||
trimStats.decreaseMaxIdFilterCount();
|
|
||||||
if (trimmedResults.size() >= numResultsRequested) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// newer results were added to the back of the list, re-sort
|
|
||||||
Collections.sort(trimmedResults, ResultComparators.ID_COMPARATOR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setMergedMinSearchedStatusId(
|
|
||||||
ThriftSearchResults searchResults,
|
|
||||||
long currentMergedMin,
|
|
||||||
boolean resultsWereTrimmed) {
|
|
||||||
if (accumulatedResponses.getMinIds().isEmpty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
long merged;
|
|
||||||
if (searchResults == null
|
|
||||||
|| !searchResults.isSetResults()
|
|
||||||
|| searchResults.getResultsSize() == 0) {
|
|
||||||
merged = currentMergedMin;
|
|
||||||
} else {
|
|
||||||
List<ThriftSearchResult> results = searchResults.getResults();
|
|
||||||
long firstResultId = results.get(0).getId();
|
|
||||||
long lastResultId = results.get(results.size() - 1).getId();
|
|
||||||
merged = Math.min(firstResultId, lastResultId);
|
|
||||||
if (!resultsWereTrimmed) {
|
|
||||||
// If the results were trimmed, we want to set minSearchedStatusID to the smallest
|
|
||||||
// tweet ID in the response. Otherwise, we want to take the min between that, and
|
|
||||||
// the current minSearchedStatusID.
|
|
||||||
merged = Math.min(merged, currentMergedMin);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
searchResults.setMinSearchedStatusID(merged);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setMergedMaxSearchedStatusId(
|
|
||||||
ThriftSearchResults searchResults,
|
|
||||||
long currentMergedMax) {
|
|
||||||
if (accumulatedResponses.getMaxIds().isEmpty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
long merged;
|
|
||||||
if (searchResults == null
|
|
||||||
|| !searchResults.isSetResults()
|
|
||||||
|| searchResults.getResultsSize() == 0) {
|
|
||||||
merged = currentMergedMax;
|
|
||||||
} else {
|
|
||||||
List<ThriftSearchResult> results = searchResults.getResults();
|
|
||||||
long firstResultId = results.get(0).getId();
|
|
||||||
long lastResultId = results.get(results.size() - 1).getId();
|
|
||||||
long maxResultId = Math.max(firstResultId, lastResultId);
|
|
||||||
merged = Math.max(maxResultId, currentMergedMax);
|
|
||||||
}
|
|
||||||
|
|
||||||
searchResults.setMaxSearchedStatusID(merged);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static void filterResultsByMergedMinMaxIds(
|
|
||||||
ThriftSearchResults results, long maxStatusId, long minStatusId, TrimStats trimStats) {
|
|
||||||
List<ThriftSearchResult> trimedResults =
|
|
||||||
Lists.newArrayListWithCapacity(results.getResultsSize());
|
|
||||||
|
|
||||||
for (ThriftSearchResult result : results.getResults()) {
|
|
||||||
long statusId = result.getId();
|
|
||||||
|
|
||||||
if (statusId > maxStatusId) {
|
|
||||||
trimStats.increaseMaxIdFilterCount();
|
|
||||||
} else if (statusId < minStatusId) {
|
|
||||||
trimStats.increaseMinIdFilterCount();
|
|
||||||
} else {
|
|
||||||
trimedResults.add(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
results.setResults(trimedResults);
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,268 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Function;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
|
||||||
import com.twitter.search.common.util.earlybird.ResultsUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merger class to merge relevance search EarlybirdResponse objects
|
|
||||||
*/
|
|
||||||
public class RelevanceResponseMerger extends EarlybirdResponseMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(RelevanceResponseMerger.class);
|
|
||||||
|
|
||||||
private static final SearchTimerStats TIMER =
|
|
||||||
SearchTimerStats.export("merge_relevance", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
private static final SearchCounter RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
|
|
||||||
SearchCounter.export("merger_relevance_tier_merge_early_terminated_with_not_enough_results");
|
|
||||||
|
|
||||||
private static final String PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS =
|
|
||||||
"merger_relevance_post_trimmed_results_skip_stat_tier_%s_partition_%d";
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
public static final String PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT =
|
|
||||||
"merger_relevance_post_trimmed_results_from_tier_%s_partition_%d";
|
|
||||||
|
|
||||||
protected static final Function<EarlybirdResponse, Map<ThriftLanguage, Integer>> LANG_MAP_GETTER =
|
|
||||||
response -> response.getSearchResults() == null
|
|
||||||
? null
|
|
||||||
: response.getSearchResults().getLanguageHistogram();
|
|
||||||
|
|
||||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.8;
|
|
||||||
|
|
||||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
|
||||||
|
|
||||||
// The number of partitions are not meaningful when it is invoked through multi-tier merging.
|
|
||||||
private final int numPartitions;
|
|
||||||
|
|
||||||
public RelevanceResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode,
|
|
||||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
|
||||||
int numPartitions) {
|
|
||||||
super(requestContext, responses, mode);
|
|
||||||
this.featureSchemaMerger = Preconditions.checkNotNull(featureSchemaMerger);
|
|
||||||
this.numPartitions = numPartitions;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getDefaultSuccessResponseThreshold() {
|
|
||||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return TIMER;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
|
||||||
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
|
||||||
long maxId = findMaxFullySearchedStatusID();
|
|
||||||
long minId = findMinFullySearchedStatusID();
|
|
||||||
|
|
||||||
Preconditions.checkNotNull(searchQuery);
|
|
||||||
Preconditions.checkState(searchQuery.isSetRankingMode());
|
|
||||||
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.RELEVANCE);
|
|
||||||
|
|
||||||
// First get the results in score order (the default comparator for this merge collector).
|
|
||||||
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
|
|
||||||
int totalResultSize = addResponsesToCollector(collector);
|
|
||||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
|
||||||
|
|
||||||
TrimStats trimStats = trimResults(searchResults);
|
|
||||||
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
|
|
||||||
searchResults,
|
|
||||||
requestContext,
|
|
||||||
"merger_relevance_tier",
|
|
||||||
accumulatedResponses.getSuccessResponses());
|
|
||||||
|
|
||||||
mergedResponse.setSearchResults(searchResults);
|
|
||||||
|
|
||||||
searchResults = mergedResponse.getSearchResults();
|
|
||||||
searchResults
|
|
||||||
.setHitCounts(aggregateHitCountMap())
|
|
||||||
.setLanguageHistogram(aggregateLanguageHistograms());
|
|
||||||
|
|
||||||
if (!accumulatedResponses.getMaxIds().isEmpty()) {
|
|
||||||
searchResults.setMaxSearchedStatusID(maxId);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!accumulatedResponses.getMinIds().isEmpty()) {
|
|
||||||
searchResults.setMinSearchedStatusID(minId);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.debug("Hits: {} Removed duplicates: {}", totalResultSize, trimStats.getRemovedDupsCount());
|
|
||||||
LOG.debug("Hash Partition'ed Earlybird call completed successfully: {}", mergedResponse);
|
|
||||||
|
|
||||||
publishNumResultsFromPartitionStatistics(mergedResponse);
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If any of the partitions has an early termination, the tier merge must also early terminate.
|
|
||||||
*
|
|
||||||
* If a partition early terminated (we haven't fully searched that partition), and we instead
|
|
||||||
* moved onto the next tier, there will be a gap of unsearched results.
|
|
||||||
*
|
|
||||||
* If our early termination condition was only if we had enough results, we could get bad quality
|
|
||||||
* results by only looking at 20 hits when asking for 20 results.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
|
||||||
boolean foundEarlyTermination) {
|
|
||||||
|
|
||||||
// Don't use computeNumResultsToKeep because if returnAllResults is true, it will be
|
|
||||||
// Integer.MAX_VALUE and we will always log a stat that we didn't get enough results
|
|
||||||
int resultsRequested;
|
|
||||||
EarlybirdRequest request = requestContext.getRequest();
|
|
||||||
if (request.isSetNumResultsToReturnAtRoot()) {
|
|
||||||
resultsRequested = request.getNumResultsToReturnAtRoot();
|
|
||||||
} else {
|
|
||||||
resultsRequested = request.getSearchQuery().getCollectorParams().getNumResultsToReturn();
|
|
||||||
}
|
|
||||||
if (foundEarlyTermination && totalResultsFromSuccessfulShards < resultsRequested) {
|
|
||||||
RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
return foundEarlyTermination;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merge language histograms from all queries.
|
|
||||||
*
|
|
||||||
* @return Merge per-language count map.
|
|
||||||
*/
|
|
||||||
private Map<ThriftLanguage, Integer> aggregateLanguageHistograms() {
|
|
||||||
Map<ThriftLanguage, Integer> totalLangCounts = new TreeMap<>(
|
|
||||||
ResultsUtil.aggregateCountMap(
|
|
||||||
accumulatedResponses.getSuccessResponses(), LANG_MAP_GETTER));
|
|
||||||
if (totalLangCounts.size() > 0) {
|
|
||||||
if (responseMessageBuilder.isDebugMode()) {
|
|
||||||
responseMessageBuilder.append("Language Distrbution:\n");
|
|
||||||
int count = 0;
|
|
||||||
for (Map.Entry<ThriftLanguage, Integer> entry : totalLangCounts.entrySet()) {
|
|
||||||
responseMessageBuilder.append(
|
|
||||||
String.format(" %10s:%6d", entry.getKey(), entry.getValue()));
|
|
||||||
if (++count % 5 == 0) {
|
|
||||||
responseMessageBuilder.append("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
responseMessageBuilder.append("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return totalLangCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the min status id that has been searched. Since no results are trimmed for Relevance mode,
|
|
||||||
* it should be the smallest among the min IDs.
|
|
||||||
*/
|
|
||||||
private long findMinFullySearchedStatusID() {
|
|
||||||
// The min ID should be the smallest among the min IDs
|
|
||||||
return accumulatedResponses.getMinIds().isEmpty() ? 0
|
|
||||||
: Collections.min(accumulatedResponses.getMinIds());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the max status id that has been searched. Since no results are trimmed for Relevance mode,
|
|
||||||
* it should be the largest among the max IDs.
|
|
||||||
*/
|
|
||||||
private long findMaxFullySearchedStatusID() {
|
|
||||||
// The max ID should be the largest among the max IDs
|
|
||||||
return accumulatedResponses.getMaxIds().isEmpty() ? 0
|
|
||||||
: Collections.max(accumulatedResponses.getMaxIds());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return all the searchResults except duplicates.
|
|
||||||
*
|
|
||||||
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
|
|
||||||
* @return TrimStats containing statistics about how many results being removed
|
|
||||||
*/
|
|
||||||
private TrimStats trimResults(ThriftSearchResults searchResults) {
|
|
||||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
|
||||||
// no results, no trimming needed
|
|
||||||
return TrimStats.EMPTY_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
|
|
||||||
// Not a normal search, no trimming needed
|
|
||||||
return TrimStats.EMPTY_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrimStats trimStats = new TrimStats();
|
|
||||||
trimExactDups(searchResults, trimStats);
|
|
||||||
|
|
||||||
truncateResults(searchResults, trimStats);
|
|
||||||
|
|
||||||
return trimStats;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void publishNumResultsFromPartitionStatistics(EarlybirdResponse mergedResponse) {
|
|
||||||
|
|
||||||
// Keep track of all of the results that were kept after merging
|
|
||||||
Set<Long> mergedResults =
|
|
||||||
EarlybirdResponseUtil.getResults(mergedResponse).getResults()
|
|
||||||
.stream()
|
|
||||||
.map(result -> result.getId())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
// For each successful response (pre merge), count how many of its results were kept post merge.
|
|
||||||
// Increment the appropriate stat.
|
|
||||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
if (!response.isSetEarlybirdServerStats()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int numResultsKept = 0;
|
|
||||||
for (ThriftSearchResult result
|
|
||||||
: EarlybirdResponseUtil.getResults(response).getResults()) {
|
|
||||||
if (mergedResults.contains(result.getId())) {
|
|
||||||
++numResultsKept;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We only update partition stats when the partition ID looks sane.
|
|
||||||
String tierName = response.getEarlybirdServerStats().getTierName();
|
|
||||||
int partition = response.getEarlybirdServerStats().getPartition();
|
|
||||||
if (partition >= 0 && partition < numPartitions) {
|
|
||||||
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT,
|
|
||||||
tierName,
|
|
||||||
partition))
|
|
||||||
.add(numResultsKept);
|
|
||||||
} else {
|
|
||||||
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS,
|
|
||||||
tierName,
|
|
||||||
partition)).increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,356 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.EnumMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.earlybird.ResponseMergerUtils;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Accumulates EarlybirdResponse's and determines when to early terminate.
|
|
||||||
*/
|
|
||||||
public abstract class ResponseAccumulator {
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static class MinMaxSearchedIdStats {
|
|
||||||
/** How many results did we actually check */
|
|
||||||
private final SearchCounter checkedMaxMinSearchedStatusId;
|
|
||||||
private final SearchCounter unsetMaxSearchedStatusId;
|
|
||||||
private final SearchCounter unsetMinSearchedStatusId;
|
|
||||||
private final SearchCounter unsetMaxAndMinSearchedStatusId;
|
|
||||||
private final SearchCounter sameMinMaxSearchedIdWithoutResults;
|
|
||||||
private final SearchCounter sameMinMaxSearchedIdWithOneResult;
|
|
||||||
private final SearchCounter sameMinMaxSearchedIdWithResults;
|
|
||||||
private final SearchCounter flippedMinMaxSearchedId;
|
|
||||||
|
|
||||||
MinMaxSearchedIdStats(EarlybirdRequestType requestType) {
|
|
||||||
String statPrefix = "merge_helper_" + requestType.getNormalizedName();
|
|
||||||
|
|
||||||
checkedMaxMinSearchedStatusId = SearchCounter.export(statPrefix
|
|
||||||
+ "_max_min_searched_id_checks");
|
|
||||||
unsetMaxSearchedStatusId = SearchCounter.export(statPrefix
|
|
||||||
+ "_unset_max_searched_status_id");
|
|
||||||
unsetMinSearchedStatusId = SearchCounter.export(statPrefix
|
|
||||||
+ "_unset_min_searched_status_id");
|
|
||||||
unsetMaxAndMinSearchedStatusId = SearchCounter.export(statPrefix
|
|
||||||
+ "_unset_max_and_min_searched_status_id");
|
|
||||||
sameMinMaxSearchedIdWithoutResults = SearchCounter.export(statPrefix
|
|
||||||
+ "_same_min_max_searched_id_without_results");
|
|
||||||
sameMinMaxSearchedIdWithOneResult = SearchCounter.export(statPrefix
|
|
||||||
+ "_same_min_max_searched_id_with_one_results");
|
|
||||||
sameMinMaxSearchedIdWithResults = SearchCounter.export(statPrefix
|
|
||||||
+ "_same_min_max_searched_id_with_results");
|
|
||||||
flippedMinMaxSearchedId = SearchCounter.export(statPrefix
|
|
||||||
+ "_flipped_min_max_searched_id");
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getCheckedMaxMinSearchedStatusId() {
|
|
||||||
return checkedMaxMinSearchedStatusId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getFlippedMinMaxSearchedId() {
|
|
||||||
return flippedMinMaxSearchedId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getUnsetMaxSearchedStatusId() {
|
|
||||||
return unsetMaxSearchedStatusId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getUnsetMinSearchedStatusId() {
|
|
||||||
return unsetMinSearchedStatusId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getUnsetMaxAndMinSearchedStatusId() {
|
|
||||||
return unsetMaxAndMinSearchedStatusId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getSameMinMaxSearchedIdWithoutResults() {
|
|
||||||
return sameMinMaxSearchedIdWithoutResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getSameMinMaxSearchedIdWithOneResult() {
|
|
||||||
return sameMinMaxSearchedIdWithOneResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
SearchCounter getSameMinMaxSearchedIdWithResults() {
|
|
||||||
return sameMinMaxSearchedIdWithResults;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final Map<EarlybirdRequestType, MinMaxSearchedIdStats> MIN_MAX_SEARCHED_ID_STATS_MAP;
|
|
||||||
static {
|
|
||||||
EnumMap<EarlybirdRequestType, MinMaxSearchedIdStats> statsMap
|
|
||||||
= Maps.newEnumMap(EarlybirdRequestType.class);
|
|
||||||
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
|
|
||||||
statsMap.put(earlybirdRequestType, new MinMaxSearchedIdStats(earlybirdRequestType));
|
|
||||||
}
|
|
||||||
|
|
||||||
MIN_MAX_SEARCHED_ID_STATS_MAP = Maps.immutableEnumMap(statsMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge has encountered at least one early terminated response.
|
|
||||||
private boolean foundEarlyTermination = false;
|
|
||||||
// Empty but successful response counter (E.g. when a tier or partition is skipped)
|
|
||||||
private int successfulEmptyResponseCount = 0;
|
|
||||||
// The list of the successful responses from all earlybird futures. This does not include empty
|
|
||||||
// responses resulted from null requests.
|
|
||||||
private final List<EarlybirdResponse> successResponses = new ArrayList<>();
|
|
||||||
// The list of the error responses from all earlybird futures.
|
|
||||||
private final List<EarlybirdResponse> errorResponses = new ArrayList<>();
|
|
||||||
// the list of max statusIds seen in each earlybird.
|
|
||||||
private final List<Long> maxIds = new ArrayList<>();
|
|
||||||
// the list of min statusIds seen in each earlybird.
|
|
||||||
private final List<Long> minIds = new ArrayList<>();
|
|
||||||
|
|
||||||
private int numResponses = 0;
|
|
||||||
|
|
||||||
private int numResultsAccumulated = 0;
|
|
||||||
private int numSearchedSegments = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a string that can be used for logging to identify a single response out of all the
|
|
||||||
* responses that are being merged.
|
|
||||||
*
|
|
||||||
* @param responseIndex the index of a response's partition or tier, depending on the type of
|
|
||||||
* responses being accumulated.
|
|
||||||
* @param numTotalResponses the total number of partitions or tiers that are being merged.
|
|
||||||
*/
|
|
||||||
public abstract String getNameForLogging(int responseIndex, int numTotalResponses);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
|
|
||||||
*
|
|
||||||
* @param responseIndex the index of of a response's partition or tier.
|
|
||||||
* @param numTotalResponses the total number of partitions or tiers that are being merged.
|
|
||||||
* @return a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
|
|
||||||
*/
|
|
||||||
public abstract String getNameForEarlybirdResponseCodeStats(
|
|
||||||
int responseIndex, int numTotalResponses);
|
|
||||||
|
|
||||||
abstract boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a EarlybirdResponse
|
|
||||||
*/
|
|
||||||
public void addResponse(EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
|
||||||
EarlybirdRequest request,
|
|
||||||
EarlybirdResponse response) {
|
|
||||||
numResponses++;
|
|
||||||
numSearchedSegments += response.getNumSearchedSegments();
|
|
||||||
|
|
||||||
if (isSkippedResponse(response)) {
|
|
||||||
// This is an empty response, no processing is required, just need to update statistics.
|
|
||||||
successfulEmptyResponseCount++;
|
|
||||||
handleSkippedResponse(response.getResponseCode());
|
|
||||||
} else if (isErrorResponse(response)) {
|
|
||||||
errorResponses.add(response);
|
|
||||||
handleErrorResponse(response);
|
|
||||||
} else {
|
|
||||||
handleSuccessfulResponse(responseMessageBuilder, request, response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isErrorResponse(EarlybirdResponse response) {
|
|
||||||
return !response.isSetResponseCode()
|
|
||||||
|| response.getResponseCode() != EarlybirdResponseCode.SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isSkippedResponse(EarlybirdResponse response) {
|
|
||||||
return response.isSetResponseCode()
|
|
||||||
&& (response.getResponseCode() == EarlybirdResponseCode.PARTITION_SKIPPED
|
|
||||||
|| response.getResponseCode() == EarlybirdResponseCode.TIER_SKIPPED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Record a response corresponding to a skipped partition or skipped tier.
|
|
||||||
*/
|
|
||||||
protected abstract void handleSkippedResponse(EarlybirdResponseCode responseCode);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handle an error response
|
|
||||||
*/
|
|
||||||
protected abstract void handleErrorResponse(EarlybirdResponse response);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subclasses can override this to perform more successful response handling.
|
|
||||||
*/
|
|
||||||
protected void extraSuccessfulResponseHandler(EarlybirdResponse response) { }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Whether the helper is for merging results from partitions within a single tier.
|
|
||||||
*/
|
|
||||||
protected final boolean isMergingPartitionsWithinATier() {
|
|
||||||
return !isMergingAcrossTiers();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Whether the helper is for merging results across different tiers.
|
|
||||||
*/
|
|
||||||
protected abstract boolean isMergingAcrossTiers();
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Record a successful response.
|
|
||||||
*/
|
|
||||||
public final void handleSuccessfulResponse(
|
|
||||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
|
||||||
EarlybirdRequest request,
|
|
||||||
EarlybirdResponse response) {
|
|
||||||
successResponses.add(response);
|
|
||||||
if (response.isSetSearchResults()) {
|
|
||||||
ThriftSearchResults searchResults = response.getSearchResults();
|
|
||||||
numResultsAccumulated += searchResults.getResultsSize();
|
|
||||||
|
|
||||||
recordMinMaxSearchedIdsAndUpdateStats(responseMessageBuilder, request, response,
|
|
||||||
searchResults);
|
|
||||||
}
|
|
||||||
if (response.isSetEarlyTerminationInfo()
|
|
||||||
&& response.getEarlyTerminationInfo().isEarlyTerminated()) {
|
|
||||||
foundEarlyTermination = true;
|
|
||||||
}
|
|
||||||
extraSuccessfulResponseHandler(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void recordMinMaxSearchedIdsAndUpdateStats(
|
|
||||||
EarlybirdResponseDebugMessageBuilder responseMessageBuidler,
|
|
||||||
EarlybirdRequest request,
|
|
||||||
EarlybirdResponse response,
|
|
||||||
ThriftSearchResults searchResults) {
|
|
||||||
|
|
||||||
boolean isMaxIdSet = searchResults.isSetMaxSearchedStatusID();
|
|
||||||
boolean isMinIdSet = searchResults.isSetMinSearchedStatusID();
|
|
||||||
|
|
||||||
if (isMaxIdSet) {
|
|
||||||
maxIds.add(searchResults.getMaxSearchedStatusID());
|
|
||||||
}
|
|
||||||
if (isMinIdSet) {
|
|
||||||
minIds.add(searchResults.getMinSearchedStatusID());
|
|
||||||
}
|
|
||||||
|
|
||||||
updateMinMaxIdStats(responseMessageBuidler, request, response, searchResults, isMaxIdSet,
|
|
||||||
isMinIdSet);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateMinMaxIdStats(
|
|
||||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
|
||||||
EarlybirdRequest request,
|
|
||||||
EarlybirdResponse response,
|
|
||||||
ThriftSearchResults searchResults,
|
|
||||||
boolean isMaxIdSet,
|
|
||||||
boolean isMinIdSet) {
|
|
||||||
// Now just track the stats.
|
|
||||||
EarlybirdRequestType requestType = EarlybirdRequestType.of(request);
|
|
||||||
MinMaxSearchedIdStats minMaxSearchedIdStats = MIN_MAX_SEARCHED_ID_STATS_MAP.get(requestType);
|
|
||||||
|
|
||||||
minMaxSearchedIdStats.checkedMaxMinSearchedStatusId.increment();
|
|
||||||
if (isMaxIdSet && isMinIdSet) {
|
|
||||||
if (searchResults.getMinSearchedStatusID() > searchResults.getMaxSearchedStatusID()) {
|
|
||||||
// We do not expect this case to happen in production.
|
|
||||||
minMaxSearchedIdStats.flippedMinMaxSearchedId.increment();
|
|
||||||
} else if (searchResults.getResultsSize() == 0
|
|
||||||
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
|
|
||||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithoutResults.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got no results, and same min/max searched ids. Request: %s, Response: %s",
|
|
||||||
request, response);
|
|
||||||
} else if (searchResults.getResultsSize() == 1
|
|
||||||
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
|
|
||||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithOneResult.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got one results, and same min/max searched ids. Request: %s, Response: %s",
|
|
||||||
request, response);
|
|
||||||
} else if (searchResults.getMaxSearchedStatusID()
|
|
||||||
== searchResults.getMinSearchedStatusID()) {
|
|
||||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithResults.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got multiple results, and same min/max searched ids. Request: %s, Response: %s",
|
|
||||||
request, response);
|
|
||||||
}
|
|
||||||
} else if (!isMaxIdSet && isMinIdSet) {
|
|
||||||
// We do not expect this case to happen in production.
|
|
||||||
minMaxSearchedIdStats.unsetMaxSearchedStatusId.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got unset maxSearchedStatusID. Request: %s, Response: %s", request, response);
|
|
||||||
} else if (isMaxIdSet && !isMinIdSet) {
|
|
||||||
// We do not expect this case to happen in production.
|
|
||||||
minMaxSearchedIdStats.unsetMinSearchedStatusId.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got unset minSearchedStatusID. Request: %s, Response: %s", request, response);
|
|
||||||
} else {
|
|
||||||
Preconditions.checkState(!isMaxIdSet && !isMinIdSet);
|
|
||||||
minMaxSearchedIdStats.unsetMaxAndMinSearchedStatusId.increment();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Got unset maxSearchedStatusID and minSearchedStatusID. Request: %s, Response: %s",
|
|
||||||
request, response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return partition counts with number of partitions, number of successful responses, and list of
|
|
||||||
* responses per tier.
|
|
||||||
*/
|
|
||||||
public abstract AccumulatedResponses.PartitionCounts getPartitionCounts();
|
|
||||||
|
|
||||||
public final AccumulatedResponses getAccumulatedResults() {
|
|
||||||
return new AccumulatedResponses(successResponses,
|
|
||||||
errorResponses,
|
|
||||||
maxIds,
|
|
||||||
minIds,
|
|
||||||
ResponseMergerUtils.mergeEarlyTerminationInfo(successResponses),
|
|
||||||
isMergingAcrossTiers(),
|
|
||||||
getPartitionCounts(),
|
|
||||||
getNumSearchedSegments());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Getters are only intended to be used by subclasses. Other users should get data from
|
|
||||||
// AccumulatedResponses
|
|
||||||
|
|
||||||
int getNumResponses() {
|
|
||||||
return numResponses;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNumSearchedSegments() {
|
|
||||||
return numSearchedSegments;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<EarlybirdResponse> getSuccessResponses() {
|
|
||||||
return successResponses;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNumResultsAccumulated() {
|
|
||||||
return numResultsAccumulated;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getSuccessfulEmptyResponseCount() {
|
|
||||||
return successfulEmptyResponseCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean foundError() {
|
|
||||||
return !errorResponses.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean foundEarlyTermination() {
|
|
||||||
return foundEarlyTermination;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,297 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A RecencyResponseMerger that prioritizes not losing results during pagination.
|
|
||||||
* As of now, this merger is used by Gnip to make sure that scrolling returns all results.
|
|
||||||
*
|
|
||||||
* The logic used for merging partitions is a bit tricky, because on one hand, we want to make sure
|
|
||||||
* that we do miss results on the next pagination request; on the other hand, we want to return as
|
|
||||||
* many results as we can, and we want to set the minSearchedStatusID of the merged response as low
|
|
||||||
* as we can, in order to minimize the number of pagination requests.
|
|
||||||
*
|
|
||||||
* The merging logic is:
|
|
||||||
*
|
|
||||||
* Realtime cluster:
|
|
||||||
* 1. merge results from all partitions
|
|
||||||
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
|
|
||||||
* on the merged response
|
|
||||||
* 3. set trimmingMinId = max(minSearchedStatusIDs of all partition responses)
|
|
||||||
* 4. trim all results to trimmingMinId
|
|
||||||
* 5. set minSearchedStatusID on the merged response to trimmingMinId
|
|
||||||
* 6. if we have more than numRequested results:
|
|
||||||
* - keep only the newest numRequested results
|
|
||||||
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
|
|
||||||
* 7. if at least one partition response is not early-terminated, set
|
|
||||||
* tierBottomId = max(minSearchedStatusIDs of all non-early-terminated responses)
|
|
||||||
* (otherwise, set tierBottomId to some undefined value: -1, Long.MAX_VALUE, etc.)
|
|
||||||
* 8. if minSearchedStatusID of the merged response is the same as tierBottomId,
|
|
||||||
* clear the early-termination flag on the merged response
|
|
||||||
*
|
|
||||||
* The logic in steps 7 and 8 can be a little tricky to understand. They basically say: when we've
|
|
||||||
* exhausted the "least deep" partition in the realtime cluster, it's time to move to the full
|
|
||||||
* archive cluster (if we keep going past the "least deep" partition, we might miss results).
|
|
||||||
*
|
|
||||||
* Full archive cluster:
|
|
||||||
* 1. merge results from all partitions
|
|
||||||
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
|
|
||||||
* on the merged response
|
|
||||||
* 3. set trimmingMinId to:
|
|
||||||
* - max(minSearchedStatusIDs of early-terminated responses), if at least one partition response
|
|
||||||
* is early-terminated
|
|
||||||
* - min(minSearchedStatusIDs of all responses), if all partition responses are not
|
|
||||||
* early-terminated
|
|
||||||
* 4. trim all results to trimmingMinId
|
|
||||||
* 5. set minSearchedStatusID of the merged response to trimmingMinId
|
|
||||||
* 6. if we have more than numRequested results:
|
|
||||||
* - keep only the newest numRequested results
|
|
||||||
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
|
|
||||||
*
|
|
||||||
* The logic in step 3 can be a little tricky to understand. On one hand, if we always set
|
|
||||||
* trimmingMinId to the highest minSearchedStatusID, then some tweets at the very bottom of some
|
|
||||||
* partitions will never be returned. Consider the case:
|
|
||||||
*
|
|
||||||
* partition 1 has tweets 10, 8, 6
|
|
||||||
* partition 2 has tweets 9, 7, 5
|
|
||||||
*
|
|
||||||
* In this case, we would always trim all results to minId = 6, and tweet 5 would never be returned.
|
|
||||||
*
|
|
||||||
* On the other hand, if we always set trimmingMinId to the lowest minSearchedStatusID, then we
|
|
||||||
* might miss tweets from partitions that early-terminated. Consider the case:
|
|
||||||
*
|
|
||||||
* partition 1 has tweets 10, 5, 3, 1 that match our query
|
|
||||||
* partition 2 has tweets 9, 8, 7, 6, 2 that match our query
|
|
||||||
*
|
|
||||||
* If we ask for 3 results, than partition 1 will return tweets 10, 5, 3, and partition 2 will
|
|
||||||
* return tweets 9, 8, 7. If we set trimmingMinId = min(minSearchedStatusIDs), then the next
|
|
||||||
* pagination request will have [max_id = 2], and we will miss tweet 6.
|
|
||||||
*
|
|
||||||
* So the intuition here is that if we have an early-terminated response, we cannot set
|
|
||||||
* trimmingMinId to something lower than the minSearchedStatusID returned by that partition
|
|
||||||
* (otherwise we might miss results from that partition). However, if we've exhausted all
|
|
||||||
* partitions, then it's OK to not trim any result, because tiers do not intersect, so we will not
|
|
||||||
* miss any result from the next tier once we get there.
|
|
||||||
*/
|
|
||||||
public class StrictRecencyResponseMerger extends RecencyResponseMerger {
|
|
||||||
private static final SearchTimerStats STRICT_RECENCY_TIMER_AVG =
|
|
||||||
SearchTimerStats.export("merge_recency_strict", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
|
||||||
new EarlyTerminationTrimmingStats("strict_recency_partition_merging");
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
|
||||||
new EarlyTerminationTrimmingStats("strict_recency_tier_merging");
|
|
||||||
|
|
||||||
private final EarlybirdCluster cluster;
|
|
||||||
|
|
||||||
public StrictRecencyResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode,
|
|
||||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
|
||||||
EarlybirdCluster cluster) {
|
|
||||||
super(requestContext, responses, mode, featureSchemaMerger);
|
|
||||||
this.cluster = cluster;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return STRICT_RECENCY_TIMER_AVG;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unlike {@link com.twitter.search.earlybird_root.mergers.RecencyResponseMerger}, this method
|
|
||||||
* takes a much simpler approach by just taking the max of the maxSearchedStatusIds.
|
|
||||||
*
|
|
||||||
* Also, when no maxSearchedStatusId is available at all, Long.MIN_VALUE is used instead of
|
|
||||||
* Long.MAX_VALUE. This ensures that we don't return any result in these cases.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected long findMaxFullySearchedStatusID() {
|
|
||||||
return accumulatedResponses.getMaxIds().isEmpty()
|
|
||||||
? Long.MIN_VALUE : Collections.max(accumulatedResponses.getMaxIds());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method is subtly different from the base class version: when no minSearchedStatusId is
|
|
||||||
* available at all, Long.MAX_VALUE is used instead of Long.MIN_VALUE. This ensures that we
|
|
||||||
* don't return any result in these cases.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected long findMinFullySearchedStatusID() {
|
|
||||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
|
||||||
if (minIds.isEmpty()) {
|
|
||||||
return Long.MAX_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
|
||||||
return getTrimmingMinId();
|
|
||||||
}
|
|
||||||
|
|
||||||
// When merging tiers, the min ID should be the smallest among the min IDs.
|
|
||||||
return Collections.min(minIds);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TrimStats trimResults(
|
|
||||||
ThriftSearchResults searchResults, long mergedMin, long mergedMax) {
|
|
||||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
|
||||||
// no results, no trimming needed
|
|
||||||
return TrimStats.EMPTY_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrimStats trimStats = new TrimStats();
|
|
||||||
trimExactDups(searchResults, trimStats);
|
|
||||||
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
|
|
||||||
int numResults = computeNumResultsToKeep();
|
|
||||||
if (searchResults.getResultsSize() > numResults) {
|
|
||||||
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - numResults);
|
|
||||||
searchResults.setResults(searchResults.getResults().subList(0, numResults));
|
|
||||||
}
|
|
||||||
|
|
||||||
return trimStats;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method is different from the base class version because when minResultId is bigger
|
|
||||||
* than currentMergedMin, we always take minResultId.
|
|
||||||
* If we don't do this, we would lose results.
|
|
||||||
*
|
|
||||||
* Illustration with an example. Assuming we are outside of the lag threshold.
|
|
||||||
* Num results requested: 3
|
|
||||||
* Response 1: min: 100 max: 900 results: 400, 500, 600
|
|
||||||
* Response 2: min: 300 max: 700 results: 350, 450, 550
|
|
||||||
*
|
|
||||||
* Merged results: 600, 550, 500
|
|
||||||
* Merged max: 900
|
|
||||||
* Merged min: we could take 300 (minId), or take 500 (minResultId).
|
|
||||||
*
|
|
||||||
* If we take minId, and use 300 as the pagination cursor, we'd lose results
|
|
||||||
* 350 and 450 when we paginate. So we have to take minResultId here.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected void setMergedMinSearchedStatusId(
|
|
||||||
ThriftSearchResults searchResults,
|
|
||||||
long currentMergedMin,
|
|
||||||
boolean resultsWereTrimmed) {
|
|
||||||
if (accumulatedResponses.getMinIds().isEmpty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
long minId = currentMergedMin;
|
|
||||||
if (resultsWereTrimmed
|
|
||||||
&& (searchResults != null)
|
|
||||||
&& searchResults.isSetResults()
|
|
||||||
&& (searchResults.getResultsSize() > 0)) {
|
|
||||||
List<ThriftSearchResult> results = searchResults.getResults();
|
|
||||||
minId = results.get(results.size() - 1).getId();
|
|
||||||
}
|
|
||||||
|
|
||||||
searchResults.setMinSearchedStatusID(minId);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
|
|
||||||
if (EarlybirdCluster.isArchive(cluster)) {
|
|
||||||
// We don't need to worry about the tier bottom when merging partition responses in the full
|
|
||||||
// archive cluster: if all partitions were exhausted and we didn't trim the results, then
|
|
||||||
// the early-terminated flag on the merged response will be false. If at least one partition
|
|
||||||
// is early-terminated, or we trimmed some results, then the ealry-terminated flag on the
|
|
||||||
// merged response will be true, and we should continue getting results from this tier before
|
|
||||||
// we move to the next one.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
|
||||||
if (searchResults.getMinSearchedStatusID() == getTierBottomId()) {
|
|
||||||
mergedResponse.getEarlyTerminationInfo().setEarlyTerminated(false);
|
|
||||||
mergedResponse.getEarlyTerminationInfo().unsetMergedEarlyTerminationReasons();
|
|
||||||
responseMessageBuilder.debugVerbose(
|
|
||||||
"Set earlytermination to false because minSearchedStatusId is tier bottom");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
|
|
||||||
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
|
|
||||||
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Determines the bottom of the realtime cluster, based on the partition responses. */
|
|
||||||
private long getTierBottomId() {
|
|
||||||
Preconditions.checkState(!EarlybirdCluster.isArchive(cluster));
|
|
||||||
|
|
||||||
long tierBottomId = -1;
|
|
||||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
if (!isEarlyTerminated(response)
|
|
||||||
&& response.isSetSearchResults()
|
|
||||||
&& response.getSearchResults().isSetMinSearchedStatusID()
|
|
||||||
&& (response.getSearchResults().getMinSearchedStatusID() > tierBottomId)) {
|
|
||||||
tierBottomId = response.getSearchResults().getMinSearchedStatusID();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tierBottomId;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Determines the minId to which all results should be trimmed. */
|
|
||||||
private long getTrimmingMinId() {
|
|
||||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
|
||||||
Preconditions.checkArgument(!minIds.isEmpty());
|
|
||||||
|
|
||||||
if (!EarlybirdCluster.isArchive(cluster)) {
|
|
||||||
return Collections.max(minIds);
|
|
||||||
}
|
|
||||||
|
|
||||||
long maxOfEarlyTerminatedMins = -1;
|
|
||||||
long minOfAllMins = Long.MAX_VALUE;
|
|
||||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
|
||||||
if (response.isSetSearchResults()
|
|
||||||
&& response.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
long minId = response.getSearchResults().getMinSearchedStatusID();
|
|
||||||
minOfAllMins = Math.min(minOfAllMins, minId);
|
|
||||||
if (isEarlyTerminated(response)) {
|
|
||||||
maxOfEarlyTerminatedMins = Math.max(maxOfEarlyTerminatedMins, minId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (maxOfEarlyTerminatedMins >= 0) {
|
|
||||||
return maxOfEarlyTerminatedMins;
|
|
||||||
} else {
|
|
||||||
return minOfAllMins;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Determines if the given earlybird response is early terminated. */
|
|
||||||
private boolean isEarlyTerminated(EarlybirdResponse response) {
|
|
||||||
return response.isSetEarlyTerminationInfo()
|
|
||||||
&& response.getEarlyTerminationInfo().isEarlyTerminated();
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,688 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.common.cache.CacheLoader;
|
|
||||||
import com.google.common.cache.LoadingCache;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.common.collections.Pair;
|
|
||||||
import com.twitter.common.quantity.Amount;
|
|
||||||
import com.twitter.common.quantity.Time;
|
|
||||||
import com.twitter.common.util.Clock;
|
|
||||||
import com.twitter.search.common.futures.Futures;
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
|
||||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
|
||||||
import com.twitter.search.common.relevance.utils.ResultComparators;
|
|
||||||
import com.twitter.search.common.search.EarlyTerminationState;
|
|
||||||
import com.twitter.search.common.util.FinagleUtil;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
|
||||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdServiceResponse;
|
|
||||||
import com.twitter.util.Function;
|
|
||||||
import com.twitter.util.Function0;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/** Utility functions for merging recency and relevance results. */
|
|
||||||
public class SuperRootResponseMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(SuperRootResponseMerger.class);
|
|
||||||
private static final String ALL_STATS_PREFIX = "superroot_response_merger_";
|
|
||||||
|
|
||||||
private static final SearchCounter FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID =
|
|
||||||
SearchCounter.export("full_archive_min_id_greater_than_realtime_min_id");
|
|
||||||
|
|
||||||
private static final String ERROR_FORMAT = "%s%s_errors_from_cluster_%s_%s";
|
|
||||||
|
|
||||||
private final ThriftSearchRankingMode rankingMode;
|
|
||||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
|
||||||
private final String featureStatPrefix;
|
|
||||||
private final Clock clock;
|
|
||||||
private final String rankingModeStatPrefix;
|
|
||||||
|
|
||||||
private final SearchCounter mergedResponseSearchResultsNotSet;
|
|
||||||
private final SearchCounter invalidMinStatusId;
|
|
||||||
private final SearchCounter invalidMaxStatusId;
|
|
||||||
private final SearchCounter noMinIds;
|
|
||||||
private final SearchCounter noMaxIds;
|
|
||||||
private final SearchCounter mergedResponses;
|
|
||||||
private final SearchCounter mergedResponsesWithExactDups;
|
|
||||||
private final LoadingCache<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter> dupsStats;
|
|
||||||
|
|
||||||
private static final EarlybirdResponse EMPTY_RESPONSE =
|
|
||||||
new EarlybirdResponse(EarlybirdResponseCode.SUCCESS, 0)
|
|
||||||
.setSearchResults(new ThriftSearchResults()
|
|
||||||
.setResults(Lists.<ThriftSearchResult>newArrayList()));
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new SuperRootResponseMerger instance.
|
|
||||||
* @param rankingMode The ranking mode to use when merging results.
|
|
||||||
* @param featureSchemaMerger The merger that can merge feature schema from different tiers.
|
|
||||||
* @param clock The clock that will be used to merge results.
|
|
||||||
*/
|
|
||||||
public SuperRootResponseMerger(ThriftSearchRankingMode rankingMode,
|
|
||||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
|
||||||
Clock clock) {
|
|
||||||
this.rankingModeStatPrefix = rankingMode.name().toLowerCase();
|
|
||||||
|
|
||||||
this.rankingMode = rankingMode;
|
|
||||||
this.featureSchemaMerger = featureSchemaMerger;
|
|
||||||
this.clock = clock;
|
|
||||||
this.featureStatPrefix = "superroot_" + rankingMode.name().toLowerCase();
|
|
||||||
|
|
||||||
mergedResponseSearchResultsNotSet = SearchCounter.export(
|
|
||||||
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_response_search_results_not_set");
|
|
||||||
invalidMinStatusId =
|
|
||||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_min_status_id");
|
|
||||||
invalidMaxStatusId =
|
|
||||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_max_status_id");
|
|
||||||
noMinIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_min_ids");
|
|
||||||
noMaxIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_max_ids");
|
|
||||||
mergedResponses = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
|
|
||||||
+ "_merged_responses");
|
|
||||||
mergedResponsesWithExactDups =
|
|
||||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
|
|
||||||
+ "_merged_responses_with_exact_dups");
|
|
||||||
dupsStats = CacheBuilder.newBuilder()
|
|
||||||
.build(new CacheLoader<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter>() {
|
|
||||||
@Override
|
|
||||||
public SearchCounter load(Pair<ThriftTweetSource, ThriftTweetSource> key) {
|
|
||||||
return SearchCounter.export(
|
|
||||||
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_responses_with_exact_dups_"
|
|
||||||
+ key.getFirst().name() + "_" + key.getSecond().name());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private void incrErrorCount(String cluster, @Nullable EarlybirdResponse response) {
|
|
||||||
String cause;
|
|
||||||
if (response != null) {
|
|
||||||
cause = response.getResponseCode().name().toLowerCase();
|
|
||||||
} else {
|
|
||||||
cause = "null_response";
|
|
||||||
}
|
|
||||||
String statName = String.format(
|
|
||||||
ERROR_FORMAT, ALL_STATS_PREFIX, rankingModeStatPrefix, cluster, cause
|
|
||||||
);
|
|
||||||
|
|
||||||
SearchCounter.export(statName).increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merges the given response futures.
|
|
||||||
*
|
|
||||||
* @param earlybirdRequestContext The earlybird request.
|
|
||||||
* @param realtimeResponseFuture The response from the realtime cluster.
|
|
||||||
* @param protectedResponseFuture The response from the protected cluster.
|
|
||||||
* @param fullArchiveResponseFuture The response from the full archive cluster.
|
|
||||||
* @return A future with the merged results.
|
|
||||||
*/
|
|
||||||
public Future<EarlybirdResponse> mergeResponseFutures(
|
|
||||||
final EarlybirdRequestContext earlybirdRequestContext,
|
|
||||||
final Future<EarlybirdServiceResponse> realtimeResponseFuture,
|
|
||||||
final Future<EarlybirdServiceResponse> protectedResponseFuture,
|
|
||||||
final Future<EarlybirdServiceResponse> fullArchiveResponseFuture) {
|
|
||||||
Future<EarlybirdResponse> mergedResponseFuture = Futures.map(
|
|
||||||
realtimeResponseFuture, protectedResponseFuture, fullArchiveResponseFuture,
|
|
||||||
new Function0<EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public EarlybirdResponse apply() {
|
|
||||||
// If the realtime response is not valid, return an error response.
|
|
||||||
// Also, the realtime service should always be called.
|
|
||||||
EarlybirdServiceResponse realtimeResponse = Futures.get(realtimeResponseFuture);
|
|
||||||
|
|
||||||
if (realtimeResponse.getServiceState().serviceWasRequested()
|
|
||||||
&& (!realtimeResponse.getServiceState().serviceWasCalled()
|
|
||||||
|| !EarlybirdResponseMergeUtil.isValidResponse(
|
|
||||||
realtimeResponse.getResponse()))) {
|
|
||||||
|
|
||||||
incrErrorCount("realtime", realtimeResponse.getResponse());
|
|
||||||
return EarlybirdResponseMergeUtil.transformInvalidResponse(
|
|
||||||
realtimeResponse.getResponse(), "realtime");
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a protected response and it's not valid, return an error response.
|
|
||||||
EarlybirdServiceResponse protectedResponse = Futures.get(protectedResponseFuture);
|
|
||||||
if (protectedResponse.getServiceState().serviceWasCalled()) {
|
|
||||||
if (!EarlybirdResponseMergeUtil.isValidResponse(protectedResponse.getResponse())) {
|
|
||||||
incrErrorCount("protected", protectedResponse.getResponse());
|
|
||||||
|
|
||||||
return EarlybirdResponseMergeUtil.transformInvalidResponse(
|
|
||||||
protectedResponse.getResponse(), "protected");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a full archive response, check if it's valid.
|
|
||||||
EarlybirdServiceResponse fullArchiveResponse = Futures.get(fullArchiveResponseFuture);
|
|
||||||
boolean archiveHasError =
|
|
||||||
fullArchiveResponse.getServiceState().serviceWasCalled()
|
|
||||||
&& !EarlybirdResponseMergeUtil.isValidResponse(fullArchiveResponse.getResponse());
|
|
||||||
|
|
||||||
// Merge the responses.
|
|
||||||
EarlybirdResponse mergedResponse = mergeResponses(
|
|
||||||
earlybirdRequestContext,
|
|
||||||
realtimeResponse.getResponse(),
|
|
||||||
protectedResponse.getResponse(),
|
|
||||||
fullArchiveResponse.getResponse());
|
|
||||||
|
|
||||||
// If the realtime clusters didn't return any results, and the full archive cluster
|
|
||||||
// returned an error response, return an error merged response.
|
|
||||||
if (archiveHasError && !EarlybirdResponseUtil.hasResults(mergedResponse)) {
|
|
||||||
incrErrorCount("full_archive", fullArchiveResponse.getResponse());
|
|
||||||
|
|
||||||
return EarlybirdResponseMergeUtil.failedEarlybirdResponse(
|
|
||||||
fullArchiveResponse.getResponse().getResponseCode(),
|
|
||||||
"realtime clusters had no results and archive cluster response had error");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Corner case: the realtime response could have exactly numRequested results, and could
|
|
||||||
// be exhausted (not early-terminated). In this case, the request should not have been
|
|
||||||
// sent to the full archive cluster.
|
|
||||||
// - If the full archive cluster is not available, or was not requested, then we don't
|
|
||||||
// need to change anything.
|
|
||||||
// - If the full archive cluster is available and was requested (but wasn't hit
|
|
||||||
// because we found enough results in the realtime cluster), then we should set the
|
|
||||||
// early-termination flag on the merged response, to indicate that we potentially
|
|
||||||
// have more results for this query in our index.
|
|
||||||
if ((fullArchiveResponse.getServiceState()
|
|
||||||
== EarlybirdServiceResponse.ServiceState.SERVICE_NOT_CALLED)
|
|
||||||
&& !EarlybirdResponseUtil.isEarlyTerminated(realtimeResponse.getResponse())) {
|
|
||||||
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
|
|
||||||
earlyTerminationInfo.setEarlyTerminationReason(
|
|
||||||
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
|
|
||||||
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we've exhausted all clusters, set the minSearchedStatusID to 0.
|
|
||||||
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
|
|
||||||
mergedResponse.getSearchResults().setMinSearchedStatusID(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handle all merging exceptions.
|
|
||||||
return handleResponseException(mergedResponseFuture,
|
|
||||||
"Exception thrown while merging responses.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merge the results in the given responses.
|
|
||||||
*
|
|
||||||
* @param earlybirdRequestContext The earlybird request context.
|
|
||||||
* @param realtimeResponse The response from the realtime cluster.
|
|
||||||
* @param protectedResponse The response from the protected cluster.
|
|
||||||
* @param fullArchiveResponse The response from the full archive cluster.
|
|
||||||
* @return The merged response.
|
|
||||||
*/
|
|
||||||
private EarlybirdResponse mergeResponses(
|
|
||||||
EarlybirdRequestContext earlybirdRequestContext,
|
|
||||||
@Nullable EarlybirdResponse realtimeResponse,
|
|
||||||
@Nullable EarlybirdResponse protectedResponse,
|
|
||||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
|
||||||
|
|
||||||
EarlybirdRequest request = earlybirdRequestContext.getRequest();
|
|
||||||
ThriftSearchQuery searchQuery = request.getSearchQuery();
|
|
||||||
int numResultsRequested;
|
|
||||||
|
|
||||||
if (request.isSetNumResultsToReturnAtRoot()) {
|
|
||||||
numResultsRequested = request.getNumResultsToReturnAtRoot();
|
|
||||||
} else {
|
|
||||||
numResultsRequested = searchQuery.getNumResults();
|
|
||||||
}
|
|
||||||
|
|
||||||
Preconditions.checkState(numResultsRequested > 0);
|
|
||||||
|
|
||||||
EarlybirdResponse mergedResponse = EMPTY_RESPONSE.deepCopy();
|
|
||||||
if ((realtimeResponse != null)
|
|
||||||
&& (realtimeResponse.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
|
|
||||||
mergedResponse = realtimeResponse.deepCopy();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mergedResponse.isSetSearchResults()) {
|
|
||||||
mergedResponseSearchResultsNotSet.increment();
|
|
||||||
mergedResponse.setSearchResults(
|
|
||||||
new ThriftSearchResults(Lists.<ThriftSearchResult>newArrayList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// If either the realtime or the full archive response is early-terminated, we want the merged
|
|
||||||
// response to be early-terminated too. The early-termination flag from the realtime response
|
|
||||||
// carries over to the merged response, because mergedResponse is just a deep copy of the
|
|
||||||
// realtime response. So we only need to check the early-termination flag of the full archive
|
|
||||||
// response.
|
|
||||||
if ((fullArchiveResponse != null)
|
|
||||||
&& EarlybirdResponseUtil.isEarlyTerminated(fullArchiveResponse)) {
|
|
||||||
mergedResponse.setEarlyTerminationInfo(fullArchiveResponse.getEarlyTerminationInfo());
|
|
||||||
}
|
|
||||||
|
|
||||||
// If realtime has empty results and protected has some results then we copy the early
|
|
||||||
// termination information if that is present
|
|
||||||
if (protectedResponse != null
|
|
||||||
&& mergedResponse.getSearchResults().getResults().isEmpty()
|
|
||||||
&& !protectedResponse.getSearchResults().getResults().isEmpty()
|
|
||||||
&& EarlybirdResponseUtil.isEarlyTerminated(protectedResponse)) {
|
|
||||||
mergedResponse.setEarlyTerminationInfo(protectedResponse.getEarlyTerminationInfo());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge the results.
|
|
||||||
List<ThriftSearchResult> mergedResults = mergeResults(
|
|
||||||
numResultsRequested, realtimeResponse, protectedResponse, fullArchiveResponse);
|
|
||||||
|
|
||||||
// Trim the merged results if necessary.
|
|
||||||
boolean resultsTrimmed = false;
|
|
||||||
if (mergedResults.size() > numResultsRequested
|
|
||||||
&& !(searchQuery.isSetRelevanceOptions()
|
|
||||||
&& searchQuery.getRelevanceOptions().isReturnAllResults())) {
|
|
||||||
// If we have more results than requested, trim the result list and re-adjust
|
|
||||||
// minSearchedStatusID.
|
|
||||||
mergedResults = mergedResults.subList(0, numResultsRequested);
|
|
||||||
|
|
||||||
// Mark early termination in merged response
|
|
||||||
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
|
|
||||||
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
|
|
||||||
earlyTerminationInfo.setEarlyTerminationReason(
|
|
||||||
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
|
|
||||||
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
resultsTrimmed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
mergedResponse.getSearchResults().setResults(mergedResults);
|
|
||||||
featureSchemaMerger.mergeFeatureSchemaAcrossClusters(
|
|
||||||
earlybirdRequestContext,
|
|
||||||
mergedResponse,
|
|
||||||
featureStatPrefix,
|
|
||||||
realtimeResponse,
|
|
||||||
protectedResponse,
|
|
||||||
fullArchiveResponse);
|
|
||||||
|
|
||||||
// Set the minSearchedStatusID and maxSearchedStatusID fields on the merged response.
|
|
||||||
setMinSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse, fullArchiveResponse,
|
|
||||||
resultsTrimmed);
|
|
||||||
setMaxSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse,
|
|
||||||
fullArchiveResponse);
|
|
||||||
|
|
||||||
int numRealtimeSearchedSegments =
|
|
||||||
(realtimeResponse != null && realtimeResponse.isSetNumSearchedSegments())
|
|
||||||
? realtimeResponse.getNumSearchedSegments()
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
int numProtectedSearchedSegments =
|
|
||||||
(protectedResponse != null && protectedResponse.isSetNumSearchedSegments())
|
|
||||||
? protectedResponse.getNumSearchedSegments()
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
int numArchiveSearchedSegments =
|
|
||||||
(fullArchiveResponse != null && fullArchiveResponse.isSetNumSearchedSegments())
|
|
||||||
? fullArchiveResponse.getNumSearchedSegments()
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
mergedResponse.setNumSearchedSegments(
|
|
||||||
numRealtimeSearchedSegments + numProtectedSearchedSegments + numArchiveSearchedSegments);
|
|
||||||
|
|
||||||
if (earlybirdRequestContext.getRequest().getDebugMode() > 0) {
|
|
||||||
mergedResponse.setDebugString(
|
|
||||||
mergeClusterDebugStrings(realtimeResponse, protectedResponse, fullArchiveResponse));
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merges the given responses.
|
|
||||||
*
|
|
||||||
* @param numResults the number of results requested
|
|
||||||
* @param realtimeResponse the response from the realtime response
|
|
||||||
* @param protectedResponse the response from the protected response
|
|
||||||
* @param fullArchiveResponse the response from the full archive response
|
|
||||||
* @return the list of merged results
|
|
||||||
*/
|
|
||||||
private List<ThriftSearchResult> mergeResults(int numResults,
|
|
||||||
@Nullable EarlybirdResponse realtimeResponse,
|
|
||||||
@Nullable EarlybirdResponse protectedResponse,
|
|
||||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
|
||||||
mergedResponses.increment();
|
|
||||||
// We first merge the results from the two realtime clusters, Realtime cluster and
|
|
||||||
// Realtime Protected Tweets cluster
|
|
||||||
List<ThriftSearchResult> mergedResults = mergePublicAndProtectedRealtimeResults(
|
|
||||||
numResults,
|
|
||||||
realtimeResponse,
|
|
||||||
protectedResponse,
|
|
||||||
fullArchiveResponse,
|
|
||||||
clock);
|
|
||||||
|
|
||||||
EarlybirdResponseMergeUtil.addResultsToList(mergedResults, fullArchiveResponse,
|
|
||||||
ThriftTweetSource.FULL_ARCHIVE_CLUSTER);
|
|
||||||
|
|
||||||
List<ThriftSearchResult> distinctMergedResults =
|
|
||||||
EarlybirdResponseMergeUtil.distinctByStatusId(mergedResults, dupsStats);
|
|
||||||
if (mergedResults != distinctMergedResults) {
|
|
||||||
mergedResponsesWithExactDups.increment();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rankingMode == ThriftSearchRankingMode.RELEVANCE
|
|
||||||
|| rankingMode == ThriftSearchRankingMode.TOPTWEETS) {
|
|
||||||
distinctMergedResults.sort(ResultComparators.SCORE_COMPARATOR);
|
|
||||||
} else {
|
|
||||||
distinctMergedResults.sort(ResultComparators.ID_COMPARATOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
return distinctMergedResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method for merging tweets from protected and realtime clusters
|
|
||||||
* - realtime, guaranteed newer than any archive tweets
|
|
||||||
* - protected, also realtime, but with a potentially larger window (optional)
|
|
||||||
* - archive, public, guaranteed older than any public realtime tweets (optional, used for
|
|
||||||
* id limits, *not added to results*)
|
|
||||||
* It adds the ThriftSearchResults from protected tweets to the realtimeResponse
|
|
||||||
*
|
|
||||||
* Algorithm diagram: (with newer tweets at the top)
|
|
||||||
* ------------------------------------ <--- protected maxSearchedStatusID
|
|
||||||
* |C:Newest protected realtime tweets|
|
|
||||||
* | (does not exist if realtime |
|
|
||||||
* | maxID >= protected maxID) |
|
|
||||||
*
|
|
||||||
* | ------------------------ | <--- 60 seconds ago
|
|
||||||
* |D:Newer protected realtime tweets |
|
|
||||||
* | (does not exist if realtime |
|
|
||||||
* | maxID >= 60 seconds ago) |
|
|
||||||
* ---------- | ------------------------ | <--- public realtime maxSearchedStatusID
|
|
||||||
* |A:Public| |E:Automatically valid protected |
|
|
||||||
* |realtime| |realtime tweets |
|
|
||||||
* ---------- | ------------------------ | <--- public realtime minSearchedStatusID
|
|
||||||
* | |
|
|
||||||
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
|
|
||||||
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
|
|
||||||
* |B:Public| | F is archive is not present |
|
|
||||||
* |archive | | |
|
|
||||||
* ---------- | ------------------------ | <--- public archive minSearchedStatusID
|
|
||||||
* |F:Older protected realtime tweets |
|
|
||||||
* | (does not exist if protected |
|
|
||||||
* | minID >= public minID) |
|
|
||||||
* ------------------------------------ <--- protected minSearchedStatusID
|
|
||||||
* Step 1: Select tweets from groups A, and E. If this is enough, return them
|
|
||||||
* Step 2: Select tweets from groups A, E, and F. If this is enough, return them
|
|
||||||
* Step 3: Select tweets from groups A, D, E, and F and return them
|
|
||||||
*
|
|
||||||
* There are two primary tradeoffs, both of which favor public tweets:
|
|
||||||
* (1) Benefit: While public indexing latency is < 60s, auto-updating never misses public tweets
|
|
||||||
* Cost: Absence of public tweets may delay protected tweets from being searchable for 60s
|
|
||||||
* (2) Benefit: No failure or delay from the protected cluster will affect realtime results
|
|
||||||
* Cost: If the protected cluster indexes more slowly, auto-update may miss its tweets
|
|
||||||
*
|
|
||||||
* @param fullArchiveTweets - used solely for generating anchor points, not merged in.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
static List<ThriftSearchResult> mergePublicAndProtectedRealtimeResults(
|
|
||||||
int numRequested,
|
|
||||||
EarlybirdResponse realtimeTweets,
|
|
||||||
EarlybirdResponse realtimeProtectedTweets,
|
|
||||||
@Nullable EarlybirdResponse fullArchiveTweets,
|
|
||||||
Clock clock) {
|
|
||||||
// See which results will actually be used
|
|
||||||
boolean isRealtimeUsable = EarlybirdResponseUtil.hasResults(realtimeTweets);
|
|
||||||
boolean isArchiveUsable = EarlybirdResponseUtil.hasResults(fullArchiveTweets);
|
|
||||||
boolean isProtectedUsable = EarlybirdResponseUtil.hasResults(realtimeProtectedTweets);
|
|
||||||
|
|
||||||
long minId = Long.MIN_VALUE;
|
|
||||||
long maxId = Long.MAX_VALUE;
|
|
||||||
if (isRealtimeUsable) {
|
|
||||||
// Determine the actual upper/lower bounds on the tweet id
|
|
||||||
if (realtimeTweets.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
minId = realtimeTweets.getSearchResults().getMinSearchedStatusID();
|
|
||||||
}
|
|
||||||
if (realtimeTweets.getSearchResults().isSetMaxSearchedStatusID()) {
|
|
||||||
maxId = realtimeTweets.getSearchResults().getMaxSearchedStatusID();
|
|
||||||
}
|
|
||||||
|
|
||||||
int justRight = realtimeTweets.getSearchResults().getResultsSize();
|
|
||||||
if (isArchiveUsable) {
|
|
||||||
justRight += fullArchiveTweets.getSearchResults().getResultsSize();
|
|
||||||
if (fullArchiveTweets.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
long fullArchiveMinId = fullArchiveTweets.getSearchResults().getMinSearchedStatusID();
|
|
||||||
if (fullArchiveMinId <= minId) {
|
|
||||||
minId = fullArchiveMinId;
|
|
||||||
} else {
|
|
||||||
FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isProtectedUsable) {
|
|
||||||
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
|
|
||||||
if (result.getId() >= minId && result.getId() <= maxId) {
|
|
||||||
justRight++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (justRight < numRequested) {
|
|
||||||
// Since this is only used as an upper bound, old (pre-2010) ids are still handled correctly
|
|
||||||
maxId = Math.max(
|
|
||||||
maxId,
|
|
||||||
SnowflakeIdParser.generateValidStatusId(
|
|
||||||
clock.nowMillis() - Amount.of(60, Time.SECONDS).as(Time.MILLISECONDS), 0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
List<ThriftSearchResult> mergedSearchResults = Lists.newArrayListWithCapacity(numRequested * 2);
|
|
||||||
|
|
||||||
// Add valid tweets in order of priority: protected, then realtime
|
|
||||||
// Only add results that are within range (that check only matters for protected)
|
|
||||||
if (isProtectedUsable) {
|
|
||||||
EarlybirdResponseMergeUtil.markWithTweetSource(
|
|
||||||
realtimeProtectedTweets.getSearchResults().getResults(),
|
|
||||||
ThriftTweetSource.REALTIME_PROTECTED_CLUSTER);
|
|
||||||
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
|
|
||||||
if (result.getId() <= maxId && result.getId() >= minId) {
|
|
||||||
mergedSearchResults.add(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isRealtimeUsable) {
|
|
||||||
EarlybirdResponseMergeUtil.addResultsToList(
|
|
||||||
mergedSearchResults, realtimeTweets, ThriftTweetSource.REALTIME_CLUSTER);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the minSearchedStatusID and maxSearchedStatusID on the protected response to the
|
|
||||||
// minId and maxId that were used to trim the protected results.
|
|
||||||
// This is needed in order to correctly set these IDs on the merged response.
|
|
||||||
ThriftSearchResults protectedResults =
|
|
||||||
EarlybirdResponseUtil.getResults(realtimeProtectedTweets);
|
|
||||||
if ((protectedResults != null)
|
|
||||||
&& protectedResults.isSetMinSearchedStatusID()
|
|
||||||
&& (protectedResults.getMinSearchedStatusID() < minId)) {
|
|
||||||
protectedResults.setMinSearchedStatusID(minId);
|
|
||||||
}
|
|
||||||
if ((protectedResults != null)
|
|
||||||
&& protectedResults.isSetMaxSearchedStatusID()
|
|
||||||
&& (protectedResults.getMaxSearchedStatusID() > maxId)) {
|
|
||||||
realtimeProtectedTweets.getSearchResults().setMaxSearchedStatusID(maxId);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergedSearchResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merges the debug strings of the given cluster responses.
|
|
||||||
*
|
|
||||||
* @param realtimeResponse The response from the realtime cluster.
|
|
||||||
* @param protectedResponse The response from the protected cluster.
|
|
||||||
* @param fullArchiveResponse The response from the full archive cluster.
|
|
||||||
* @return The merged debug string.
|
|
||||||
*/
|
|
||||||
public static String mergeClusterDebugStrings(@Nullable EarlybirdResponse realtimeResponse,
|
|
||||||
@Nullable EarlybirdResponse protectedResponse,
|
|
||||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
if ((realtimeResponse != null) && realtimeResponse.isSetDebugString()) {
|
|
||||||
sb.append("Realtime response: ").append(realtimeResponse.getDebugString());
|
|
||||||
}
|
|
||||||
if ((protectedResponse != null) && protectedResponse.isSetDebugString()) {
|
|
||||||
if (sb.length() > 0) {
|
|
||||||
sb.append("\n");
|
|
||||||
}
|
|
||||||
sb.append("Protected response: ").append(protectedResponse.getDebugString());
|
|
||||||
}
|
|
||||||
if ((fullArchiveResponse != null) && fullArchiveResponse.isSetDebugString()) {
|
|
||||||
if (sb.length() > 0) {
|
|
||||||
sb.append("\n");
|
|
||||||
}
|
|
||||||
sb.append("Full archive response: ").append(fullArchiveResponse.getDebugString());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sb.length() == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the minSearchedStatusID field on the merged response.
|
|
||||||
*
|
|
||||||
* @param mergedResponse The merged response.
|
|
||||||
* @param fullArchiveResponse The full archive response.
|
|
||||||
* @param resultsTrimmed Whether the merged response results were trimmed.
|
|
||||||
*/
|
|
||||||
private void setMinSearchedStatusId(EarlybirdResponse mergedResponse,
|
|
||||||
EarlybirdResponse realtimeResponse,
|
|
||||||
EarlybirdResponse protectedResponse,
|
|
||||||
EarlybirdResponse fullArchiveResponse,
|
|
||||||
boolean resultsTrimmed) {
|
|
||||||
Preconditions.checkNotNull(mergedResponse.getSearchResults());
|
|
||||||
if (resultsTrimmed) {
|
|
||||||
// We got more results that we asked for and we trimmed them.
|
|
||||||
// Set minSearchedStatusID to the ID of the oldest result.
|
|
||||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
|
||||||
if (searchResults.getResultsSize() > 0) {
|
|
||||||
List<ThriftSearchResult> results = searchResults.getResults();
|
|
||||||
long lastResultId = results.get(results.size() - 1).getId();
|
|
||||||
searchResults.setMinSearchedStatusID(lastResultId);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We did not get more results that we asked for. Get the min of the minSearchedStatusIDs of
|
|
||||||
// the merged responses.
|
|
||||||
List<Long> minIDs = Lists.newArrayList();
|
|
||||||
if (fullArchiveResponse != null
|
|
||||||
&& fullArchiveResponse.isSetSearchResults()
|
|
||||||
&& fullArchiveResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
minIDs.add(fullArchiveResponse.getSearchResults().getMinSearchedStatusID());
|
|
||||||
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()
|
|
||||||
&& mergedResponse.getSearchResults().getMinSearchedStatusID()
|
|
||||||
< fullArchiveResponse.getSearchResults().getMinSearchedStatusID()) {
|
|
||||||
invalidMinStatusId.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (protectedResponse != null
|
|
||||||
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
|
|
||||||
&& EarlybirdResponseUtil.hasResults(protectedResponse)
|
|
||||||
&& protectedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
minIDs.add(protectedResponse.getSearchResults().getMinSearchedStatusID());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
|
||||||
minIDs.add(mergedResponse.getSearchResults().getMinSearchedStatusID());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!minIDs.isEmpty()) {
|
|
||||||
mergedResponse.getSearchResults().setMinSearchedStatusID(Collections.min(minIDs));
|
|
||||||
} else {
|
|
||||||
noMinIds.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the maxSearchedStatusID field on the merged response.
|
|
||||||
*
|
|
||||||
* @param mergedResponse The merged response.
|
|
||||||
* @param fullArchiveResponse The full archive response.
|
|
||||||
*/
|
|
||||||
private void setMaxSearchedStatusId(EarlybirdResponse mergedResponse,
|
|
||||||
EarlybirdResponse realtimeResponse,
|
|
||||||
EarlybirdResponse protectedResponse,
|
|
||||||
EarlybirdResponse fullArchiveResponse) {
|
|
||||||
|
|
||||||
Preconditions.checkNotNull(mergedResponse.getSearchResults());
|
|
||||||
List<Long> maxIDs = Lists.newArrayList();
|
|
||||||
if (fullArchiveResponse != null
|
|
||||||
&& fullArchiveResponse.isSetSearchResults()
|
|
||||||
&& fullArchiveResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
|
||||||
maxIDs.add(fullArchiveResponse.getSearchResults().getMaxSearchedStatusID());
|
|
||||||
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()
|
|
||||||
&& fullArchiveResponse.getSearchResults().getMaxSearchedStatusID()
|
|
||||||
> mergedResponse.getSearchResults().getMaxSearchedStatusID()) {
|
|
||||||
invalidMaxStatusId.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (protectedResponse != null
|
|
||||||
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
|
|
||||||
&& EarlybirdResponseUtil.hasResults(protectedResponse)
|
|
||||||
&& protectedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
|
||||||
|
|
||||||
maxIDs.add(protectedResponse.getSearchResults().getMaxSearchedStatusID());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
|
||||||
maxIDs.add(mergedResponse.getSearchResults().getMaxSearchedStatusID());
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
|
||||||
if (searchResults.getResultsSize() > 0) {
|
|
||||||
List<ThriftSearchResult> results = searchResults.getResults();
|
|
||||||
maxIDs.add(results.get(0).getId());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!maxIDs.isEmpty()) {
|
|
||||||
mergedResponse.getSearchResults().setMaxSearchedStatusID(Collections.max(maxIDs));
|
|
||||||
} else {
|
|
||||||
noMaxIds.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handles exceptions thrown while merging responses. Timeout exceptions are converted to
|
|
||||||
* SERVER_TIMEOUT_ERROR responses. All other exceptions are converted to PERSISTENT_ERROR
|
|
||||||
* responses.
|
|
||||||
*/
|
|
||||||
private Future<EarlybirdResponse> handleResponseException(
|
|
||||||
Future<EarlybirdResponse> responseFuture, final String debugMsg) {
|
|
||||||
return responseFuture.handle(
|
|
||||||
new Function<Throwable, EarlybirdResponse>() {
|
|
||||||
@Override
|
|
||||||
public EarlybirdResponse apply(Throwable t) {
|
|
||||||
EarlybirdResponseCode responseCode = EarlybirdResponseCode.PERSISTENT_ERROR;
|
|
||||||
if (FinagleUtil.isTimeoutException(t)) {
|
|
||||||
responseCode = EarlybirdResponseCode.SERVER_TIMEOUT_ERROR;
|
|
||||||
}
|
|
||||||
EarlybirdResponse response = new EarlybirdResponse(responseCode, 0);
|
|
||||||
response.setDebugString(debugMsg + "\n" + t);
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,90 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.collect.Collections2;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merger class to merge termstats EarlybirdResponse objects
|
|
||||||
*/
|
|
||||||
public class TermStatisticsResponseMerger extends EarlybirdResponseMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(TermStatisticsResponseMerger.class);
|
|
||||||
|
|
||||||
private static final SearchTimerStats TIMER =
|
|
||||||
SearchTimerStats.export("merge_term_stats", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
|
||||||
|
|
||||||
public TermStatisticsResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode) {
|
|
||||||
super(requestContext, responses, mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return TIMER;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getDefaultSuccessResponseThreshold() {
|
|
||||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected EarlybirdResponse internalMerge(EarlybirdResponse termStatsResponse) {
|
|
||||||
ThriftTermStatisticsRequest termStatisticsRequest =
|
|
||||||
requestContext.getRequest().getTermStatisticsRequest();
|
|
||||||
|
|
||||||
Collection<EarlybirdResponse> termStatsResults =
|
|
||||||
Collections2.filter(accumulatedResponses.getSuccessResponses(),
|
|
||||||
earlybirdResponse -> earlybirdResponse.isSetTermStatisticsResults());
|
|
||||||
|
|
||||||
ThriftTermStatisticsResults results =
|
|
||||||
new ThriftTermResultsMerger(
|
|
||||||
termStatsResults,
|
|
||||||
termStatisticsRequest.getHistogramSettings())
|
|
||||||
.merge();
|
|
||||||
|
|
||||||
if (results.getTermResults().isEmpty()) {
|
|
||||||
final String line = "No results returned from any backend for term statistics request: {}";
|
|
||||||
|
|
||||||
// If the termstats request was not empty and we got empty results. log it as a warning
|
|
||||||
// otherwise log is as a debug.
|
|
||||||
if (termStatisticsRequest.getTermRequestsSize() > 0) {
|
|
||||||
LOG.warn(line, termStatisticsRequest);
|
|
||||||
} else {
|
|
||||||
LOG.debug(line, termStatisticsRequest);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
termStatsResponse.setTermStatisticsResults(results);
|
|
||||||
termStatsResponse.setSearchResults(ThriftTermResultsMerger.mergeSearchStats(termStatsResults));
|
|
||||||
|
|
||||||
FacetsResultsUtils.fixNativePhotoUrl(results.getTermResults().values());
|
|
||||||
|
|
||||||
LOG.debug("TermStats call completed successfully: {}", termStatsResponse);
|
|
||||||
|
|
||||||
return termStatsResponse;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
|
||||||
boolean foundEarlyTermination) {
|
|
||||||
// To get accurate term stats, must never early terminate
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,472 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchCounter;
|
|
||||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftHistogramSettings;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTermRequest;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTermResults;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes multiple successful EarlybirdResponses and merges them.
|
|
||||||
*/
|
|
||||||
public class ThriftTermResultsMerger {
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ThriftTermResultsMerger.class);
|
|
||||||
|
|
||||||
private static final SearchCounter BIN_ID_GAP_COUNTER =
|
|
||||||
SearchCounter.export("thrift_term_results_merger_found_gap_in_bin_ids");
|
|
||||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_ADJUSTED_NULL =
|
|
||||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_adjusted_null");
|
|
||||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS =
|
|
||||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_null_without_bins");
|
|
||||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_OUT_OF_RANGE =
|
|
||||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_out_of_range");
|
|
||||||
private static final SearchCounter RESPONSE_WITHOUT_DRIVING_QUERY_HIT =
|
|
||||||
SearchCounter.export("response_without_driving_query_hit");
|
|
||||||
|
|
||||||
private static final ThriftTermRequest GLOBAL_COUNT_REQUEST =
|
|
||||||
new ThriftTermRequest().setFieldName("").setTerm("");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sorted list of the most recent (and contiguous) numBins binIds across all responses.
|
|
||||||
* Expected to be an empty list if this request did not ask for histograms, or if it
|
|
||||||
* did ask for histograms for 0 numBins.
|
|
||||||
*/
|
|
||||||
@Nonnull
|
|
||||||
private final List<Integer> mostRecentBinIds;
|
|
||||||
/**
|
|
||||||
* The first binId in the {@link #mostRecentBinIds} list. This value is not meant to be used in
|
|
||||||
* case mostRecentBinIds is an empty list.
|
|
||||||
*/
|
|
||||||
private final int firstBinId;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For each unique ThriftTermRequest, stores an array of the total counts for all the binIds
|
|
||||||
* that we will return, summed up across all earlybird responses.
|
|
||||||
*
|
|
||||||
* The values in each totalCounts array correspond to the binIds in the
|
|
||||||
* {@link #mostRecentBinIds} list.
|
|
||||||
*
|
|
||||||
* Key: thrift term request.
|
|
||||||
* Value: array of the total counts summed up across all earlybird responses for the key's
|
|
||||||
* term request, corresponding to the binIds in {@link #mostRecentBinIds}.
|
|
||||||
*/
|
|
||||||
private final Map<ThriftTermRequest, int[]> mergedTermRequestTotalCounts = Maps.newHashMap();
|
|
||||||
/**
|
|
||||||
* The set of all unique binIds that we are merging.
|
|
||||||
*/
|
|
||||||
private final Map<ThriftTermRequest, ThriftTermResults> termResultsMap = Maps.newHashMap();
|
|
||||||
private final ThriftHistogramSettings histogramSettings;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Only relevant for merging responses with histogram settings.
|
|
||||||
* This will be null either if (1) the request is not asking for histograms at all, or if
|
|
||||||
* (2) numBins was set to 0 (and no bin can be considered complete).
|
|
||||||
* If not null, the minCompleteBinId will be computed as the max over all merged responses'
|
|
||||||
* minCompleteBinId's.
|
|
||||||
*/
|
|
||||||
@Nullable
|
|
||||||
private final Integer minCompleteBinId;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create merger with collections of results to merge
|
|
||||||
*/
|
|
||||||
public ThriftTermResultsMerger(Collection<EarlybirdResponse> termStatsResults,
|
|
||||||
ThriftHistogramSettings histogramSettings) {
|
|
||||||
this.histogramSettings = histogramSettings;
|
|
||||||
|
|
||||||
Collection<EarlybirdResponse> filteredTermStatsResults =
|
|
||||||
filterOutEmptyEarlybirdResponses(termStatsResults);
|
|
||||||
|
|
||||||
this.mostRecentBinIds = findMostRecentBinIds(histogramSettings, filteredTermStatsResults);
|
|
||||||
this.firstBinId = mostRecentBinIds.isEmpty()
|
|
||||||
? Integer.MAX_VALUE // Should not be used if mostRecentBinIds is empty.
|
|
||||||
: mostRecentBinIds.get(0);
|
|
||||||
|
|
||||||
List<Integer> minCompleteBinIds =
|
|
||||||
Lists.newArrayListWithCapacity(filteredTermStatsResults.size());
|
|
||||||
for (EarlybirdResponse response : filteredTermStatsResults) {
|
|
||||||
Preconditions.checkState(response.getResponseCode() == EarlybirdResponseCode.SUCCESS,
|
|
||||||
"Unsuccessful responses should not be given to ThriftTermResultsMerger.");
|
|
||||||
Preconditions.checkState(response.getTermStatisticsResults() != null,
|
|
||||||
"Response given to ThriftTermResultsMerger has no termStatisticsResults.");
|
|
||||||
|
|
||||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
|
||||||
List<Integer> binIds = termStatisticsResults.getBinIds();
|
|
||||||
|
|
||||||
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry
|
|
||||||
: termStatisticsResults.getTermResults().entrySet()) {
|
|
||||||
ThriftTermRequest termRequest = entry.getKey();
|
|
||||||
ThriftTermResults termResults = entry.getValue();
|
|
||||||
|
|
||||||
adjustTotalCount(termResults, binIds);
|
|
||||||
addTotalCountData(termRequest, termResults);
|
|
||||||
|
|
||||||
if (histogramSettings != null) {
|
|
||||||
Preconditions.checkState(termStatisticsResults.isSetBinIds());
|
|
||||||
addHistogramData(termRequest, termResults, termStatisticsResults.getBinIds());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (histogramSettings != null) {
|
|
||||||
addMinCompleteBinId(minCompleteBinIds, response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
minCompleteBinId = minCompleteBinIds.isEmpty() ? null : Collections.max(minCompleteBinIds);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Take out any earlybird responses that we know did not match anything relevant to the query,
|
|
||||||
* and may have erroneous binIds.
|
|
||||||
*/
|
|
||||||
private Collection<EarlybirdResponse> filterOutEmptyEarlybirdResponses(
|
|
||||||
Collection<EarlybirdResponse> termStatsResults) {
|
|
||||||
List<EarlybirdResponse> emptyResponses = Lists.newArrayList();
|
|
||||||
List<EarlybirdResponse> nonEmptyResponses = Lists.newArrayList();
|
|
||||||
for (EarlybirdResponse response : termStatsResults) {
|
|
||||||
// Guard against erroneously merging and returning 0 counts when we actually have data to
|
|
||||||
// return from other partitions.
|
|
||||||
// When a query doesn't match anything at all on an earlybird, the binIds that are returned
|
|
||||||
// do not correspond at all to the actual query, and are just based on the data range on the
|
|
||||||
// earlybird itself.
|
|
||||||
// We can identify these responses as (1) being non-early terminated, and (2) having 0
|
|
||||||
// hits processed.
|
|
||||||
if (isTermStatResponseEmpty(response)) {
|
|
||||||
emptyResponses.add(response);
|
|
||||||
} else {
|
|
||||||
nonEmptyResponses.add(response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If all responses were "empty", we will just use those to merge into a new set of empty
|
|
||||||
// responses, using the binIds provided.
|
|
||||||
return nonEmptyResponses.isEmpty() ? emptyResponses : nonEmptyResponses;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isTermStatResponseEmpty(EarlybirdResponse response) {
|
|
||||||
return response.isSetSearchResults()
|
|
||||||
&& (response.getSearchResults().getNumHitsProcessed() == 0
|
|
||||||
|| drivingQueryHasNoHits(response))
|
|
||||||
&& response.isSetEarlyTerminationInfo()
|
|
||||||
&& !response.getEarlyTerminationInfo().isEarlyTerminated();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the global count bins are all 0, then we know the driving query has no hits.
|
|
||||||
* This check is added as a short term solution for SEARCH-5476. This short term fix requires
|
|
||||||
* the client to set the includeGlobalCounts to kick in.
|
|
||||||
*/
|
|
||||||
private boolean drivingQueryHasNoHits(EarlybirdResponse response) {
|
|
||||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
|
||||||
if (termStatisticsResults == null || termStatisticsResults.getTermResults() == null) {
|
|
||||||
// If there's no term stats response, be conservative and return false.
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
ThriftTermResults globalCounts =
|
|
||||||
termStatisticsResults.getTermResults().get(GLOBAL_COUNT_REQUEST);
|
|
||||||
if (globalCounts == null) {
|
|
||||||
// We cannot tell if driving query has no hits, be conservative and return false.
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
for (Integer i : globalCounts.getHistogramBins()) {
|
|
||||||
if (i > 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RESPONSE_WITHOUT_DRIVING_QUERY_HIT.increment();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Integer> findMostRecentBinIds(
|
|
||||||
ThriftHistogramSettings histogramSettings,
|
|
||||||
Collection<EarlybirdResponse> filteredTermStatsResults) {
|
|
||||||
Integer largestFirstBinId = null;
|
|
||||||
List<Integer> binIdsToUse = null;
|
|
||||||
|
|
||||||
if (histogramSettings != null) {
|
|
||||||
int numBins = histogramSettings.getNumBins();
|
|
||||||
for (EarlybirdResponse response : filteredTermStatsResults) {
|
|
||||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
|
||||||
Preconditions.checkState(termStatisticsResults.getBinIds().size() == numBins,
|
|
||||||
"expected all results to have the same numBins. "
|
|
||||||
+ "request numBins: %s, response numBins: %s",
|
|
||||||
numBins, termStatisticsResults.getBinIds().size());
|
|
||||||
|
|
||||||
if (termStatisticsResults.getBinIds().size() > 0) {
|
|
||||||
Integer firstBinId = termStatisticsResults.getBinIds().get(0);
|
|
||||||
if (largestFirstBinId == null
|
|
||||||
|| largestFirstBinId.intValue() < firstBinId.intValue()) {
|
|
||||||
largestFirstBinId = firstBinId;
|
|
||||||
binIdsToUse = termStatisticsResults.getBinIds();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return binIdsToUse == null
|
|
||||||
? Collections.<Integer>emptyList()
|
|
||||||
// Just in case, make a copy of the binIds so that we don't reuse the same list from one
|
|
||||||
// of the responses we're merging.
|
|
||||||
: Lists.newArrayList(binIdsToUse);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addMinCompleteBinId(List<Integer> minCompleteBinIds,
|
|
||||||
EarlybirdResponse response) {
|
|
||||||
Preconditions.checkNotNull(histogramSettings);
|
|
||||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
|
||||||
|
|
||||||
if (termStatisticsResults.isSetMinCompleteBinId()) {
|
|
||||||
// This is the base case. Early terminated or not, this is the proper minCompleteBinId
|
|
||||||
// that we're told to use for this response.
|
|
||||||
minCompleteBinIds.add(termStatisticsResults.getMinCompleteBinId());
|
|
||||||
} else if (termStatisticsResults.getBinIds().size() > 0) {
|
|
||||||
// This is the case where no bins were complete. For the purposes of merging, we need to
|
|
||||||
// mark all the binIds in this response as non-complete by marking the "max(binId)+1" as the
|
|
||||||
// last complete bin.
|
|
||||||
// When returning the merged response, we still have a guard for the resulting
|
|
||||||
// minCompleteBinId being outside of the binIds range, and will set the returned
|
|
||||||
// minCompleteBinId value to null, if this response's binIds end up being used as the most
|
|
||||||
// recent ones, and we need to signify that none of the bins are complete.
|
|
||||||
int binSize = termStatisticsResults.getBinIds().size();
|
|
||||||
Integer maxBinId = termStatisticsResults.getBinIds().get(binSize - 1);
|
|
||||||
minCompleteBinIds.add(maxBinId + 1);
|
|
||||||
|
|
||||||
LOG.debug("Adjusting null minCompleteBinId for response: {}, histogramSettings {}",
|
|
||||||
response, histogramSettings);
|
|
||||||
MIN_COMPLETE_BIN_ID_ADJUSTED_NULL.increment();
|
|
||||||
} else {
|
|
||||||
// This should only happen in the case where numBins is set to 0.
|
|
||||||
Preconditions.checkState(histogramSettings.getNumBins() == 0,
|
|
||||||
"Expected numBins set to 0. response: %s", response);
|
|
||||||
Preconditions.checkState(minCompleteBinIds.isEmpty(),
|
|
||||||
"minCompleteBinIds: %s", minCompleteBinIds);
|
|
||||||
|
|
||||||
LOG.debug("Got null minCompleteBinId with no bins for response: {}, histogramSettings {}",
|
|
||||||
response, histogramSettings);
|
|
||||||
MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addTotalCountData(ThriftTermRequest request, ThriftTermResults results) {
|
|
||||||
ThriftTermResults termResults = termResultsMap.get(request);
|
|
||||||
if (termResults == null) {
|
|
||||||
termResultsMap.put(request, results);
|
|
||||||
} else {
|
|
||||||
termResults.setTotalCount(termResults.getTotalCount() + results.getTotalCount());
|
|
||||||
if (termResults.isSetMetadata()) {
|
|
||||||
termResults.setMetadata(
|
|
||||||
FacetsResultsUtils.mergeFacetMetadata(termResults.getMetadata(),
|
|
||||||
results.getMetadata(), null));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set results.totalCount to the sum of hits in only the bins that will be returned in
|
|
||||||
* the merged response.
|
|
||||||
*/
|
|
||||||
private void adjustTotalCount(ThriftTermResults results, List<Integer> binIds) {
|
|
||||||
int adjustedTotalCount = 0;
|
|
||||||
List<Integer> histogramBins = results.getHistogramBins();
|
|
||||||
if ((binIds != null) && (histogramBins != null)) {
|
|
||||||
Preconditions.checkState(
|
|
||||||
histogramBins.size() == binIds.size(),
|
|
||||||
"Expected ThriftTermResults to have the same number of histogramBins as binIds set in "
|
|
||||||
+ " ThriftTermStatisticsResults. ThriftTermResults.histogramBins: %s, "
|
|
||||||
+ " ThriftTermStatisticsResults.binIds: %s.",
|
|
||||||
histogramBins, binIds);
|
|
||||||
for (int i = 0; i < binIds.size(); ++i) {
|
|
||||||
if (binIds.get(i) >= firstBinId) {
|
|
||||||
adjustedTotalCount += histogramBins.get(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
results.setTotalCount(adjustedTotalCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addHistogramData(ThriftTermRequest request,
|
|
||||||
ThriftTermResults results,
|
|
||||||
List<Integer> binIds) {
|
|
||||||
|
|
||||||
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
|
|
||||||
if (requestTotalCounts == null) {
|
|
||||||
requestTotalCounts = new int[mostRecentBinIds.size()];
|
|
||||||
mergedTermRequestTotalCounts.put(request, requestTotalCounts);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only consider these results if they fall into the mostRecentBinIds range.
|
|
||||||
//
|
|
||||||
// The list of returned binIds is expected to be both sorted (in ascending order), and
|
|
||||||
// contiguous, which allows us to use firstBinId to check if it overlaps with the
|
|
||||||
// mostRecentBinIds range.
|
|
||||||
if (binIds.size() > 0 && binIds.get(binIds.size() - 1) >= firstBinId) {
|
|
||||||
int firstBinIndex;
|
|
||||||
if (binIds.get(0) == firstBinId) {
|
|
||||||
// This should be the common case when all partitions have the same binIds,
|
|
||||||
// no need to do a binary search.
|
|
||||||
firstBinIndex = 0;
|
|
||||||
} else {
|
|
||||||
// The firstBinId must be in the binIds range. We can find it using binary search since
|
|
||||||
// binIds are sorted.
|
|
||||||
firstBinIndex = Collections.binarySearch(binIds, firstBinId);
|
|
||||||
Preconditions.checkState(firstBinIndex >= 0,
|
|
||||||
"Expected to find firstBinId (%s) in the result binIds: %s, "
|
|
||||||
+ "histogramSettings: %s, termRequest: %s",
|
|
||||||
firstBinId, binIds, histogramSettings, request);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip binIds that are before the smallest binId that we will use in the merged results.
|
|
||||||
for (int i = firstBinIndex; i < binIds.size(); i++) {
|
|
||||||
final Integer currentBinValue = results.getHistogramBins().get(i);
|
|
||||||
requestTotalCounts[i - firstBinIndex] += currentBinValue.intValue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a new ThriftTermStatisticsResults with the total counts merged, and if enabled,
|
|
||||||
* histogram bins merged.
|
|
||||||
*/
|
|
||||||
public ThriftTermStatisticsResults merge() {
|
|
||||||
ThriftTermStatisticsResults results = new ThriftTermStatisticsResults(termResultsMap);
|
|
||||||
|
|
||||||
if (histogramSettings != null) {
|
|
||||||
mergeHistogramBins(results);
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes multiple histogram results and merges them so:
|
|
||||||
* 1) Counts for the same binId (represents the time) and term are summed
|
|
||||||
* 2) All results are re-indexed to use the most recent bins found from the union of all bins
|
|
||||||
*/
|
|
||||||
private void mergeHistogramBins(ThriftTermStatisticsResults mergedResults) {
|
|
||||||
|
|
||||||
mergedResults.setBinIds(mostRecentBinIds);
|
|
||||||
mergedResults.setHistogramSettings(histogramSettings);
|
|
||||||
|
|
||||||
setMinCompleteBinId(mergedResults);
|
|
||||||
|
|
||||||
useMostRecentBinsForEachThriftTermResults();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setMinCompleteBinId(ThriftTermStatisticsResults mergedResults) {
|
|
||||||
if (mostRecentBinIds.isEmpty()) {
|
|
||||||
Preconditions.checkState(minCompleteBinId == null);
|
|
||||||
// This is the case where the requested numBins is set to 0. We don't have any binIds,
|
|
||||||
// and the minCompleteBinId has to be unset.
|
|
||||||
LOG.debug("Empty binIds returned for mergedResults: {}", mergedResults);
|
|
||||||
} else {
|
|
||||||
Preconditions.checkNotNull(minCompleteBinId);
|
|
||||||
|
|
||||||
Integer maxBinId = mostRecentBinIds.get(mostRecentBinIds.size() - 1);
|
|
||||||
if (minCompleteBinId <= maxBinId) {
|
|
||||||
mergedResults.setMinCompleteBinId(minCompleteBinId);
|
|
||||||
} else {
|
|
||||||
// Leaving the minCompleteBinId unset as it is outside the range of the returned binIds.
|
|
||||||
LOG.debug("Computed minCompleteBinId: {} is out of maxBinId: {} for mergedResults: {}",
|
|
||||||
minCompleteBinId, mergedResults);
|
|
||||||
MIN_COMPLETE_BIN_ID_OUT_OF_RANGE.increment();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check that the binIds we are using are contiguous. Increment the provided stat if we find
|
|
||||||
* a gap, as we don't expect to find any.
|
|
||||||
* See: SEARCH-4362
|
|
||||||
*
|
|
||||||
* @param sortedBinIds most recent numBins sorted binIds.
|
|
||||||
* @param binIdGapCounter stat to increment if we see a gap in the binId range.
|
|
||||||
*/
|
|
||||||
@VisibleForTesting
|
|
||||||
static void checkForBinIdGaps(List<Integer> sortedBinIds, SearchCounter binIdGapCounter) {
|
|
||||||
for (int i = sortedBinIds.size() - 1; i > 0; i--) {
|
|
||||||
final Integer currentBinId = sortedBinIds.get(i);
|
|
||||||
final Integer previousBinId = sortedBinIds.get(i - 1);
|
|
||||||
|
|
||||||
if (previousBinId < currentBinId - 1) {
|
|
||||||
binIdGapCounter.increment();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a view containing only the last N items from the list
|
|
||||||
*/
|
|
||||||
private static <E> List<E> takeLastN(List<E> lst, int n) {
|
|
||||||
Preconditions.checkArgument(n <= lst.size(),
|
|
||||||
"Attempting to take more elements than the list has. List size: %s, n: %s", lst.size(), n);
|
|
||||||
return lst.subList(lst.size() - n, lst.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void useMostRecentBinsForEachThriftTermResults() {
|
|
||||||
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry : termResultsMap.entrySet()) {
|
|
||||||
ThriftTermRequest request = entry.getKey();
|
|
||||||
ThriftTermResults results = entry.getValue();
|
|
||||||
|
|
||||||
List<Integer> histogramBins = Lists.newArrayList();
|
|
||||||
results.setHistogramBins(histogramBins);
|
|
||||||
|
|
||||||
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
|
|
||||||
Preconditions.checkNotNull(requestTotalCounts);
|
|
||||||
|
|
||||||
for (int totalCount : requestTotalCounts) {
|
|
||||||
histogramBins.add(totalCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merges search stats from several earlybird responses and puts them in
|
|
||||||
* {@link ThriftSearchResults} structure.
|
|
||||||
*
|
|
||||||
* @param responses earlybird responses to merge the search stats from
|
|
||||||
* @return merged search stats inside of {@link ThriftSearchResults} structure
|
|
||||||
*/
|
|
||||||
public static ThriftSearchResults mergeSearchStats(Collection<EarlybirdResponse> responses) {
|
|
||||||
int numHitsProcessed = 0;
|
|
||||||
int numPartitionsEarlyTerminated = 0;
|
|
||||||
|
|
||||||
for (EarlybirdResponse response : responses) {
|
|
||||||
ThriftSearchResults searchResults = response.getSearchResults();
|
|
||||||
|
|
||||||
if (searchResults != null) {
|
|
||||||
numHitsProcessed += searchResults.getNumHitsProcessed();
|
|
||||||
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ThriftSearchResults searchResults = new ThriftSearchResults(new ArrayList<>());
|
|
||||||
searchResults.setNumHitsProcessed(numHitsProcessed);
|
|
||||||
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
|
|
||||||
return searchResults;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,97 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
|
||||||
import com.twitter.search.earlybird.thrift.TierResponse;
|
|
||||||
|
|
||||||
public final class TierResponseAccumulator extends ResponseAccumulator {
|
|
||||||
private static final String TARGET_TYPE_TIER = "tier";
|
|
||||||
|
|
||||||
private final List<TierResponse> tierResponses = new ArrayList<>();
|
|
||||||
// Total number of partitions the request was sent to, across all tiers.
|
|
||||||
private int totalPartitionsQueriedInAllTiers = 0;
|
|
||||||
// Among the above partitions, the number of them that returned successful responses.
|
|
||||||
private int totalSuccessfulPartitionsInAllTiers = 0;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getNameForLogging(int responseIndex, int numTotalResponses) {
|
|
||||||
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
|
|
||||||
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean isMergingAcrossTiers() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
|
|
||||||
if (foundError()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int numResults = 0;
|
|
||||||
for (EarlybirdResponse resp : getSuccessResponses()) {
|
|
||||||
if (resp.isSetSearchResults()) {
|
|
||||||
numResults += resp.getSearchResults().getResultsSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return merger.shouldEarlyTerminateTierMerge(numResults, foundEarlyTermination());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void handleSkippedResponse(EarlybirdResponseCode responseCode) {
|
|
||||||
tierResponses.add(new TierResponse()
|
|
||||||
.setNumPartitions(0)
|
|
||||||
.setNumSuccessfulPartitions(0)
|
|
||||||
.setTierResponseCode(responseCode));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void handleErrorResponse(EarlybirdResponse response) {
|
|
||||||
// TierResponse, which is only returned if merging results from different tiers.
|
|
||||||
TierResponse tr = new TierResponse();
|
|
||||||
if (response != null) {
|
|
||||||
if (response.isSetResponseCode()) {
|
|
||||||
tr.setTierResponseCode(response.getResponseCode());
|
|
||||||
} else {
|
|
||||||
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
|
||||||
}
|
|
||||||
tr.setNumPartitions(response.getNumPartitions());
|
|
||||||
tr.setNumSuccessfulPartitions(0);
|
|
||||||
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
|
|
||||||
} else {
|
|
||||||
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR)
|
|
||||||
.setNumPartitions(0)
|
|
||||||
.setNumSuccessfulPartitions(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
tierResponses.add(tr);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
|
|
||||||
return new AccumulatedResponses.PartitionCounts(totalPartitionsQueriedInAllTiers,
|
|
||||||
totalSuccessfulPartitionsInAllTiers, tierResponses);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void extraSuccessfulResponseHandler(EarlybirdResponse response) {
|
|
||||||
// Record tier stats.
|
|
||||||
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
|
|
||||||
totalSuccessfulPartitionsInAllTiers += response.getNumSuccessfulPartitions();
|
|
||||||
|
|
||||||
tierResponses.add(new TierResponse()
|
|
||||||
.setNumPartitions(response.getNumPartitions())
|
|
||||||
.setNumSuccessfulPartitions(response.getNumSuccessfulPartitions())
|
|
||||||
.setTierResponseCode(EarlybirdResponseCode.SUCCESS));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,65 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
|
||||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
|
||||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
|
||||||
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
|
|
||||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
|
||||||
import com.twitter.util.Future;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merger class to merge toptweets EarlybirdResponse objects
|
|
||||||
*/
|
|
||||||
public class TopTweetsResponseMerger extends EarlybirdResponseMerger {
|
|
||||||
|
|
||||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
|
||||||
|
|
||||||
private static final SearchTimerStats TIMER =
|
|
||||||
SearchTimerStats.export("merge_top_tweets", TimeUnit.NANOSECONDS, false, true);
|
|
||||||
|
|
||||||
public TopTweetsResponseMerger(EarlybirdRequestContext requestContext,
|
|
||||||
List<Future<EarlybirdResponse>> responses,
|
|
||||||
ResponseAccumulator mode) {
|
|
||||||
super(requestContext, responses, mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SearchTimerStats getMergedResponseTimer() {
|
|
||||||
return TIMER;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getDefaultSuccessResponseThreshold() {
|
|
||||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
|
||||||
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
|
||||||
|
|
||||||
Preconditions.checkNotNull(searchQuery);
|
|
||||||
Preconditions.checkState(searchQuery.isSetRankingMode());
|
|
||||||
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.TOPTWEETS);
|
|
||||||
|
|
||||||
int numResultsRequested = computeNumResultsToKeep();
|
|
||||||
|
|
||||||
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
|
|
||||||
|
|
||||||
addResponsesToCollector(collector);
|
|
||||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
|
||||||
if (numResultsRequested < searchResults.getResults().size()) {
|
|
||||||
searchResults.setResults(searchResults.getResults().subList(0, numResultsRequested));
|
|
||||||
}
|
|
||||||
|
|
||||||
mergedResponse.setSearchResults(searchResults);
|
|
||||||
|
|
||||||
return mergedResponse;
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
@ -1,71 +0,0 @@
|
|||||||
package com.twitter.search.earlybird_root.mergers;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tracks what situations are encountered when trimming results
|
|
||||||
*/
|
|
||||||
class TrimStats {
|
|
||||||
protected static final TrimStats EMPTY_STATS = new TrimStats();
|
|
||||||
|
|
||||||
private int maxIdFilterCount = 0;
|
|
||||||
private int minIdFilterCount = 0;
|
|
||||||
private int removedDupsCount = 0;
|
|
||||||
private int resultsTruncatedFromTailCount = 0;
|
|
||||||
|
|
||||||
int getMinIdFilterCount() {
|
|
||||||
return minIdFilterCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getRemovedDupsCount() {
|
|
||||||
return removedDupsCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getResultsTruncatedFromTailCount() {
|
|
||||||
return resultsTruncatedFromTailCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
void decreaseMaxIdFilterCount() {
|
|
||||||
maxIdFilterCount--;
|
|
||||||
}
|
|
||||||
|
|
||||||
void decreaseMinIdFilterCount() {
|
|
||||||
minIdFilterCount--;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clearMaxIdFilterCount() {
|
|
||||||
this.maxIdFilterCount = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clearMinIdFilterCount() {
|
|
||||||
this.minIdFilterCount = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void increaseMaxIdFilterCount() {
|
|
||||||
maxIdFilterCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
void increaseMinIdFilterCount() {
|
|
||||||
minIdFilterCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
void increaseRemovedDupsCount() {
|
|
||||||
removedDupsCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setResultsTruncatedFromTailCount(int resultsTruncatedFromTailCount) {
|
|
||||||
this.resultsTruncatedFromTailCount = resultsTruncatedFromTailCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder builder = new StringBuilder();
|
|
||||||
|
|
||||||
builder.append("TrimStats{");
|
|
||||||
builder.append("maxIdFilterCount=").append(maxIdFilterCount);
|
|
||||||
builder.append(", minIdFilterCount=").append(minIdFilterCount);
|
|
||||||
builder.append(", removedDupsCount=").append(removedDupsCount);
|
|
||||||
builder.append(", resultsTruncatedFromTailCount=").append(resultsTruncatedFromTailCount);
|
|
||||||
builder.append("}");
|
|
||||||
|
|
||||||
return builder.toString();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
java_library(
|
|
||||||
sources = ["*.java"],
|
|
||||||
platform = "java8",
|
|
||||||
tags = ["bazel-compatible"],
|
|
||||||
dependencies = [
|
|
||||||
"3rdparty/jvm/com/google/guava",
|
|
||||||
"3rdparty/jvm/commons-io",
|
|
||||||
"3rdparty/jvm/org/json",
|
|
||||||
"src/java/com/twitter/common/util:system-mocks",
|
|
||||||
"src/java/com/twitter/search/common/dark",
|
|
||||||
"src/java/com/twitter/search/common/metrics",
|
|
||||||
"src/java/com/twitter/search/common/util/io/periodic",
|
|
||||||
"src/java/com/twitter/search/common/util/json",
|
|
||||||
],
|
|
||||||
)
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user