diff --git a/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.docx b/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.docx new file mode 100644 index 000000000..77abde390 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.java b/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.java deleted file mode 100644 index b5f31361a..000000000 --- a/src/java/com/twitter/search/earlybird/search/facets/SimpleCountRankingModule.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.search.earlybird.search.facets; - -import java.util.Iterator; - -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.core.earlybird.facets.FacetCountState; -import com.twitter.search.core.earlybird.facets.FacetCountState.FacetFieldResults; -import com.twitter.search.earlybird.search.EarlybirdLuceneSearcher; -import com.twitter.search.earlybird.thrift.ThriftFacetFieldResults; - -public class SimpleCountRankingModule extends FacetRankingModule { - - @Override - public void prepareResults( - EarlybirdLuceneSearcher.FacetSearchResults hits, - FacetCountState facetCountState) { - Iterator> fieldResultsIterator = - facetCountState.getFacetFieldResultsIterator(); - while (fieldResultsIterator.hasNext()) { - FacetFieldResults state = fieldResultsIterator.next(); - if (!state.isFinished()) { - Schema.FieldInfo facetField = - facetCountState.getSchema().getFacetFieldByFacetName(state.facetName); - state.results = hits.getFacetResults( - facetField.getFieldType().getFacetName(), state.numResultsRequested); - if (state.results != null) { - state.numResultsFound = state.results.getTopFacetsSize(); - } - } - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.docx b/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.docx new file mode 100644 index 000000000..4357e9395 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.java b/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.java deleted file mode 100644 index 3ceeacb20..000000000 --- a/src/java/com/twitter/search/earlybird/search/facets/SpaceFacetCollector.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.twitter.search.earlybird.search.facets; - -import java.util.ArrayList; -import java.util.List; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Sets; - -import org.apache.commons.lang.StringUtils; - -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.earlybird.partition.AudioSpaceTable; -import com.twitter.search.earlybird.thrift.AudioSpaceState; -import com.twitter.search.earlybird.thrift.ThriftSearchResult; -import com.twitter.search.earlybird.thrift.ThriftSearchResultAudioSpace; - -public class SpaceFacetCollector extends AbstractFacetTermCollector { - private final List spaces = new ArrayList<>(); - - private final AudioSpaceTable audioSpaceTable; - - public SpaceFacetCollector(AudioSpaceTable audioSpaceTable) { - this.audioSpaceTable = audioSpaceTable; - } - - @Override - public boolean collect(int docID, long termID, int fieldID) { - - String spaceId = getTermFromFacet(termID, fieldID, - Sets.newHashSet(EarlybirdFieldConstant.SPACES_FACET)); - if (StringUtils.isEmpty(spaceId)) { - return false; - } - - spaces.add(new ThriftSearchResultAudioSpace(spaceId, - audioSpaceTable.isRunning(spaceId) ? AudioSpaceState.RUNNING - : AudioSpaceState.ENDED)); - - return true; - } - - @Override - public void fillResultAndClear(ThriftSearchResult result) { - getExtraMetadata(result).setSpaces(ImmutableList.copyOf(spaces)); - spaces.clear(); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.docx b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.docx new file mode 100644 index 000000000..24c4d59d9 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.java b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.java deleted file mode 100644 index 5856d5281..000000000 --- a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsCollector.java +++ /dev/null @@ -1,487 +0,0 @@ -package com.twitter.search.earlybird.search.facets; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import org.apache.commons.lang.StringUtils; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchResultsStats; -import com.twitter.search.common.schema.SchemaUtil; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.common.search.EarlyTerminationState; -import com.twitter.search.common.util.earlybird.TermStatisticsUtil; -import com.twitter.search.core.earlybird.index.TimeMapper; -import com.twitter.search.earlybird.index.EarlybirdSingleSegmentSearcher; -import com.twitter.search.earlybird.search.AbstractResultsCollector; -import com.twitter.search.earlybird.search.SearchResultsInfo; -import com.twitter.search.earlybird.stats.EarlybirdSearcherStats; -import com.twitter.search.earlybird.thrift.ThriftHistogramSettings; -import com.twitter.search.earlybird.thrift.ThriftTermRequest; -import com.twitter.search.earlybird.thrift.ThriftTermResults; - -public class TermStatisticsCollector extends AbstractResultsCollector - { - private static final EarlyTerminationState TERMINATED_TERM_STATS_COUNTING_DONE = - new EarlyTerminationState("terminated_term_stats_counting_done", true); - - // Stats for tracking histogram results. - private static final SearchResultsStats TERM_STATS_HISTOGRAM_REQUESTS_WITH_MOVED_BACK_BINS = - SearchResultsStats.export("term_statistics_collector_queries_with_moved_back_bins"); - private static final SearchCounter TERM_STATS_SKIPPED_LARGER_OUT_OF_BOUNDS_HITS = - SearchCounter.export("term_statistics_collector_skipped_larger_out_of_bounds_hits"); - - @VisibleForTesting - static final class TermStatistics { - private final ThriftTermRequest termRequest; - private final Term term; // could be null, for count across all fields - private int termDF = 0; - private int termCount = 0; - private final int[] histogramBins; - - // Per-segment information. - private PostingsEnum segmentDocsEnum; // could be null, for count across all fields - private boolean segmentDone; - - @VisibleForTesting - TermStatistics(ThriftTermRequest termRequest, Term term, int numBins) { - this.termRequest = termRequest; - this.term = term; - this.histogramBins = new int[numBins]; - } - - /** - * Take the currently accumulated counts and "move them back" to make room for counts from more - * recent binIds. - * - * For example, if the oldFirstBinID was set to 10, and the histogramBins were {3, 4, 5, 6, 7}, - * after this call with newFirstBinID set to 12, the histogramBins will be set - * to {5, 6, 7, 0, 0}. - * - * @param oldFirstBinID the binId of the firstBin that's been used up to now. - * @param newFirstBinID the new binId of the firstBin that will be used from now on. - * The newFirstBinID is presumed to be larger than the oldFirstBinID, and is asserted. - */ - @VisibleForTesting - void moveBackTermCounts(int oldFirstBinID, int newFirstBinID) { - Preconditions.checkState(oldFirstBinID < newFirstBinID); - // move counts back by this many bins - final int moveBackBy = newFirstBinID - oldFirstBinID; - - this.termCount = 0; - for (int i = 0; i < histogramBins.length; i++) { - int oldCount = histogramBins[i]; - histogramBins[i] = 0; - int newIndex = i - moveBackBy; - if (newIndex >= 0) { - histogramBins[newIndex] = oldCount; - this.termCount += oldCount; - } - } - } - - @VisibleForTesting void countHit(int bin) { - termCount++; - histogramBins[bin]++; - } - - @VisibleForTesting int getTermCount() { - return termCount; - } - - @VisibleForTesting int[] getHistogramBins() { - return histogramBins; - } - } - - private TermStatistics[] termStatistics; - - // Histogram fields. - private int numBins; - private int binSize; - - private int numTimesBinsWereMovedBack = 0; - private int numLargerOutOfBoundsBinsSkipped = 0; - - private static final int SEEN_OUT_OF_RANGE_THRESHOLD = 10; - - private int seenOutOfRange = 0; - - // ID of the first bin - effectively time / binSize. This is calculated - // relative to the first collected in-order hit. - private int firstBinID = -1; - // List of per-segment debug information specifically useful for termstat request debugging. - private List termStatisticsDebugInfo = new ArrayList<>(); - - /** - * Creates a new term stats collector. - */ - public TermStatisticsCollector( - ImmutableSchemaInterface schema, - TermStatisticsRequestInfo searchRequestInfo, - EarlybirdSearcherStats searcherStats, - Clock clock, - int requestDebugMode) { - super(schema, searchRequestInfo, clock, searcherStats, requestDebugMode); - - // Set up the histogram bins. - if (searchRequestInfo.isReturnHistogram()) { - ThriftHistogramSettings histogramSettings = searchRequestInfo.getHistogramSettings(); - this.numBins = histogramSettings.getNumBins(); - binSize = TermStatisticsUtil.determineBinSize(histogramSettings); - } else { - this.numBins = 0; - this.binSize = 0; - } - - // Set up the term statistics array. - List termRequests = searchRequestInfo.getTermRequests(); - if (termRequests == null) { - this.termStatistics = new TermStatistics[0]; - return; - } - - this.termStatistics = new TermStatistics[searchRequestInfo.getTermRequests().size()]; - for (int i = 0; i < searchRequestInfo.getTermRequests().size(); i++) { - final ThriftTermRequest termRequest = searchRequestInfo.getTermRequests().get(i); - - Term term = null; - String fieldName = termRequest.getFieldName(); - if (!StringUtils.isBlank(fieldName)) { - // First check if it's a facet field. - Schema.FieldInfo facetField = schema.getFacetFieldByFacetName(termRequest.getFieldName()); - if (facetField != null) { - term = new Term(facetField.getName(), termRequest.getTerm()); - } else { - // EarlybirdSearcher.validateRequest() should've already checked that the field exists in - // the schema, and that the term can be converted to the type of this field. However, if - // that did not happen for some reason, an exception will be thrown here, which will be - // converted to a TRANSIENT_ERROR response code. - Schema.FieldInfo fieldInfo = schema.getFieldInfo(fieldName); - Preconditions.checkNotNull( - fieldInfo, - "Found a ThriftTermRequest for a field that's not in the schema: " + fieldName - + ". This should've been caught by EarlybirdSearcher.validateRequest()!"); - term = new Term(fieldName, SchemaUtil.toBytesRef(fieldInfo, termRequest.getTerm())); - } - } else { - // NOTE: if the fieldName is empty, this is a catch-all term request for the count across - // all fields. We'll just use a null term in the TermStatistics object. - } - - termStatistics[i] = new TermStatistics(termRequest, term, numBins); - } - } - - @Override - public void startSegment() throws IOException { - termStatisticsDebugInfo.add( - "Starting segment in timestamp range: [" + timeMapper.getFirstTime() - + ", " + timeMapper.getLastTime() + "]"); - for (TermStatistics termStats : termStatistics) { - termStats.segmentDone = true; // until we know it's false later. - TermsEnum termsEnum = null; - if (termStats.term != null) { - Terms terms = currTwitterReader.terms(termStats.term.field()); - if (terms != null) { - termsEnum = terms.iterator(); - if (termsEnum != null && termsEnum.seekExact(termStats.term.bytes())) { - termStats.termDF += termsEnum.docFreq(); // Only meaningful for matchAll queries. - termStats.segmentDocsEnum = - termsEnum.postings(termStats.segmentDocsEnum, PostingsEnum.FREQS); - termStats.segmentDone = termStats.segmentDocsEnum == null - || termStats.segmentDocsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; - } else { - // this term doesn't exist in this segment. - } - } - } else { - // Catch-all case - termStats.termDF += currTwitterReader.numDocs(); // Only meaningful for matchAll queries. - termStats.segmentDocsEnum = null; - termStats.segmentDone = false; - } - } - } - - private int calculateBin(final int tweetTime) { - if (tweetTime == TimeMapper.ILLEGAL_TIME) { - return -1; - } - - final int binID = Math.abs(tweetTime) / binSize; - final int expectedFirstBinId = binID - numBins + 1; - - if (firstBinID == -1) { - firstBinID = expectedFirstBinId; - } else if (expectedFirstBinId > firstBinID) { - numTimesBinsWereMovedBack++; - final int oldOutOfOrderFirstBinID = firstBinID; - firstBinID = expectedFirstBinId; - // We got a more recent out of order bin, move previous counts back. - for (TermStatistics ts : termStatistics) { - ts.moveBackTermCounts(oldOutOfOrderFirstBinID, firstBinID); - } - } - - final int binIndex = binID - firstBinID; - if (binIndex >= numBins) { - // In-order times should be decreasing, - // and out of order times seen after an in-order tweet should also be smaller than the - // first in-order tweet's time. Will track these and export as a stat. - numLargerOutOfBoundsBinsSkipped++; - return -1; - } else if (binIndex < 0) { - // Early termination criteria. - seenOutOfRange++; - } else { - // Reset the counter, since we want to see consecutive tweets that are out of our bin range - // not single anomalies. - seenOutOfRange = 0; - } - - return binIndex; - } - - @Override - public void doCollect(long tweetID) throws IOException { - if (searchRequestInfo.isReturnHistogram()) { - final int tweetTime = timeMapper.getTime(curDocId); - final int binIndex = calculateBin(tweetTime); - if (binIndex >= 0) { - for (TermStatistics ts : termStatistics) { - if (!ts.segmentDone) { - countHist(ts, binIndex); - } - } - } - } else { - for (TermStatistics ts : termStatistics) { - if (!ts.segmentDone) { - countNoHist(ts); - } - } - } - } - - @Override - public void skipSegment(EarlybirdSingleSegmentSearcher searcher) { - // Do nothing here. - // We don't do accounting that's done in AbstractResultsCollector for Term Stats - // requests because otherwise the bin ID calculation will be confused. - } - - private boolean advance(TermStatistics ts) throws IOException { - PostingsEnum docsEnum = ts.segmentDocsEnum; - if (docsEnum.docID() < curDocId) { - if (docsEnum.advance(curDocId) == DocIdSetIterator.NO_MORE_DOCS) { - ts.segmentDone = true; - return false; - } - } - return docsEnum.docID() == curDocId; - } - - private boolean countHist(TermStatistics ts, int bin) throws IOException { - if (ts.term != null && !advance(ts)) { - return false; - } - ts.countHit(bin); - return true; - } - - private boolean countNoHist(TermStatistics ts) throws IOException { - if (ts.term != null && !advance(ts)) { - return false; - } - ts.termCount++; - return true; - } - - @Override - public EarlyTerminationState innerShouldCollectMore() { - if (readyToTerminate()) { - return setEarlyTerminationState(TERMINATED_TERM_STATS_COUNTING_DONE); - } - return EarlyTerminationState.COLLECTING; - } - - /** - * The termination logic is simple - we know what our earliest bin is and once we see a result - * that's before our earliest bin, we terminate. - * - * Our results come with increasing internal doc ids, which should correspond to decreasing - * timestamps. See SEARCH-27729, TWEETYPIE-7031. - * - * We early terminate after we have seen enough tweets that are outside of the bin - * range that we want to return. This way we're not terminating too early because of single tweets - * with wrong timestamps. - */ - @VisibleForTesting - boolean readyToTerminate() { - return this.seenOutOfRange >= SEEN_OUT_OF_RANGE_THRESHOLD; - } - - @Override - public TermStatisticsSearchResults doGetResults() { - return new TermStatisticsSearchResults(); - } - - public final class TermStatisticsSearchResults extends SearchResultsInfo { - public final List binIds; - public final Map results; - public final int lastCompleteBinId; - public final List termStatisticsDebugInfo; - - private TermStatisticsSearchResults() { - // Initialize term stat debug info - termStatisticsDebugInfo = TermStatisticsCollector.this.termStatisticsDebugInfo; - - if (termStatistics.length > 0) { - results = new HashMap<>(); - - if (searchRequestInfo.isReturnHistogram()) { - binIds = new ArrayList<>(numBins); - int minSearchedTime = TermStatisticsCollector.this.getMinSearchedTime(); - - if (shouldCollectDetailedDebugInfo()) { - termStatisticsDebugInfo.add("minSearchedTime: " + minSearchedTime); - int maxSearchedTime = TermStatisticsCollector.this.getMaxSearchedTime(); - termStatisticsDebugInfo.add("maxSearchedTime: " + maxSearchedTime); - } - - int lastCompleteBin = -1; - - computeFirstBinId(TermStatisticsCollector.this.isSetMinSearchedTime(), minSearchedTime); - trackHistogramResultStats(); - - // Example: - // minSearchTime = 53s - // binSize = 10 - // firstBinId = 5 - // numBins = 4 - // binId = 5, 6, 7, 8 - // binTimeStamp = 50s, 60s, 70s, 80s - for (int i = 0; i < numBins; i++) { - int binId = firstBinID + i; - int binTimeStamp = binId * binSize; - binIds.add(binId); - if (lastCompleteBin == -1 && binTimeStamp > minSearchedTime) { - lastCompleteBin = binId; - } - } - - if (!getEarlyTerminationState().isTerminated()) { - // only if we didn't early terminate we can be sure to use the firstBinID as - // lastCompleteBinId - lastCompleteBinId = firstBinID; - if (shouldCollectDetailedDebugInfo()) { - termStatisticsDebugInfo.add("no early termination"); - } - } else { - lastCompleteBinId = lastCompleteBin; - if (shouldCollectDetailedDebugInfo()) { - termStatisticsDebugInfo.add( - "early terminated for reason: " + getEarlyTerminationReason()); - } - } - if (shouldCollectDetailedDebugInfo()) { - termStatisticsDebugInfo.add("lastCompleteBinId: " + lastCompleteBinId); - } - } else { - binIds = null; - lastCompleteBinId = -1; - } - - for (TermStatistics ts : termStatistics) { - ThriftTermResults termResults = new ThriftTermResults().setTotalCount(ts.termCount); - - if (searchRequestInfo.isReturnHistogram()) { - List list = new ArrayList<>(); - for (int count : ts.histogramBins) { - list.add(count); - } - termResults.setHistogramBins(list); - } - - results.put(ts.termRequest, termResults); - } - } else { - binIds = null; - results = null; - lastCompleteBinId = -1; - } - } - - @Override - public String toString() { - StringBuilder res = new StringBuilder(); - res.append("TermStatisticsSearchResults(\n"); - if (binIds != null) { - res.append(" binIds=").append(binIds).append("\n"); - } - res.append(" lastCompleteBinId=").append(lastCompleteBinId).append("\n"); - if (results != null) { - res.append(" results=").append(results).append("\n"); - } - res.append(")"); - return res.toString(); - } - - public List getTermStatisticsDebugInfo() { - return termStatisticsDebugInfo; - } - } - - /** - * Figure out what the actual firstBinId is for this query. - */ - private void computeFirstBinId(boolean isSetMinSearchedTime, int minSearchedTime) { - if (firstBinID == -1) { - if (!isSetMinSearchedTime) { - // This would only happen if we don't search any segments, which for now we have - // only seen happening if since_time or until_time don't intersect at all with - // the range of the served segments. - firstBinID = 0; - } else { - // Example: - // minSearchedTime = 54 - // binSize = 10 - // firstBinId = 5 - firstBinID = minSearchedTime / binSize; - } - - if (shouldCollectDetailedDebugInfo()) { - termStatisticsDebugInfo.add("firstBinId: " + firstBinID); - } - } - } - - @VisibleForTesting - int getSeenOutOfRange() { - return seenOutOfRange; - } - - private void trackHistogramResultStats() { - if (numLargerOutOfBoundsBinsSkipped > 0) { - TERM_STATS_SKIPPED_LARGER_OUT_OF_BOUNDS_HITS.increment(); - } - - if (numTimesBinsWereMovedBack > 0) { - TERM_STATS_HISTOGRAM_REQUESTS_WITH_MOVED_BACK_BINS.recordResults(numTimesBinsWereMovedBack); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.docx b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.docx new file mode 100644 index 000000000..216677e4f Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.java b/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.java deleted file mode 100644 index 6162f4192..000000000 --- a/src/java/com/twitter/search/earlybird/search/facets/TermStatisticsRequestInfo.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.twitter.search.earlybird.search.facets; - -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; - -import org.apache.lucene.search.Query; - -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.common.search.TerminationTracker; -import com.twitter.search.common.util.text.NormalizerHelper; -import com.twitter.search.common.util.url.URLUtils; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.search.SearchRequestInfo; -import com.twitter.search.earlybird.thrift.ThriftHistogramSettings; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftTermRequest; -import com.twitter.search.earlybird.thrift.ThriftTermStatisticsRequest; - -public class TermStatisticsRequestInfo extends SearchRequestInfo { - private static final Set FACET_URL_FIELDS_TO_NORMALIZE = new ImmutableSet.Builder() - .add(EarlybirdFieldConstant.IMAGES_FACET) - .add(EarlybirdFieldConstant.VIDEOS_FACET) - .add(EarlybirdFieldConstant.NEWS_FACET) - .build(); - - protected final List termRequests; - protected final ThriftHistogramSettings histogramSettings; - - /** - * Creates a new TermStatisticsRequestInfo instance using the provided query. - */ - public TermStatisticsRequestInfo(ThriftSearchQuery searchQuery, - Query luceneQuery, - ThriftTermStatisticsRequest termStatsRequest, - TerminationTracker terminationTracker) { - super(searchQuery, luceneQuery, terminationTracker); - this.termRequests = termStatsRequest.isSetTermRequests() - ? termStatsRequest.getTermRequests() : new LinkedList<>(); - this.histogramSettings = termStatsRequest.getHistogramSettings(); - if (termStatsRequest.isIncludeGlobalCounts()) { - // Add an empty request to indicate we need a global count across all fields. - termRequests.add(new ThriftTermRequest().setFieldName("").setTerm("")); - } - - // We only normalize TEXT terms and urls. All other terms, e.g. topics (named entities) are - // not normalized. Here the assumption is that the caller passes the exact terms back that - // the facet API returned - for (ThriftTermRequest termReq : termRequests) { - if (termReq.getTerm().isEmpty()) { - continue; // the special catch-all term. - } - - if (!termReq.isSetFieldName() - || termReq.getFieldName().equals(EarlybirdFieldConstant.TEXT_FIELD.getFieldName())) { - // normalize the TEXT term as it's normalized during ingestion - termReq.setTerm(NormalizerHelper.normalizeWithUnknownLocale( - termReq.getTerm(), EarlybirdConfig.getPenguinVersion())); - } else if (FACET_URL_FIELDS_TO_NORMALIZE.contains(termReq.getFieldName())) { - // remove the trailing slash from the URL path. This operation is idempotent, - // so either a spiderduck URL or a facet URL can be used here. The latter would just - // be normalized twice, which is fine. - termReq.setTerm(URLUtils.normalizePath(termReq.getTerm())); - } - } - } - - @Override - protected int calculateMaxHitsToProcess(ThriftSearchQuery searchQuery) { - Preconditions.checkNotNull(searchQuery.getCollectorParams()); - if (!searchQuery.getCollectorParams().isSetTerminationParams() - || !searchQuery.getCollectorParams().getTerminationParams().isSetMaxHitsToProcess()) { - // Override the default value to all hits. - return Integer.MAX_VALUE; - } else { - return super.calculateMaxHitsToProcess(searchQuery); - } - } - - public final List getTermRequests() { - return this.termRequests; - } - - public final ThriftHistogramSettings getHistogramSettings() { - return this.histogramSettings; - } - - public final boolean isReturnHistogram() { - return this.histogramSettings != null; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.docx b/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.docx new file mode 100644 index 000000000..5ed9e1c9a Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.java b/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.java deleted file mode 100644 index a46149fc4..000000000 --- a/src/java/com/twitter/search/earlybird/search/facets/TweetSearchFacetCountIteratorFactory.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.search.earlybird.search.facets; - -import java.io.IOException; - -import com.google.common.base.Preconditions; - -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.facets.CSFFacetCountIterator; -import com.twitter.search.core.earlybird.facets.FacetCountIterator; -import com.twitter.search.core.earlybird.facets.FacetCountIteratorFactory; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; - -/** - * Factory of {@link FacetCountIterator} instances for tweet search. - * It provides a special iterator for the retweets facet. - */ -public final class TweetSearchFacetCountIteratorFactory extends FacetCountIteratorFactory { - public static final TweetSearchFacetCountIteratorFactory FACTORY = - new TweetSearchFacetCountIteratorFactory(); - - private TweetSearchFacetCountIteratorFactory() { - } - - @Override - public FacetCountIterator getFacetCountIterator( - EarlybirdIndexSegmentAtomicReader reader, - Schema.FieldInfo fieldInfo) throws IOException { - Preconditions.checkNotNull(reader); - Preconditions.checkNotNull(fieldInfo); - Preconditions.checkArgument(fieldInfo.getFieldType().isUseCSFForFacetCounting()); - - String facetName = fieldInfo.getFieldType().getFacetName(); - - if (EarlybirdFieldConstant.RETWEETS_FACET.equals(facetName)) { - return new RetweetFacetCountIterator(reader, fieldInfo); - } else { - return new CSFFacetCountIterator(reader, fieldInfo); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.docx new file mode 100644 index 000000000..7119afdb3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.java b/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.java deleted file mode 100644 index 3577b8635..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/BadUserRepFilter.java +++ /dev/null @@ -1,115 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -public final class BadUserRepFilter extends Query { - /** - * Creates a query that filters out results coming from users with bad reputation. - * - * @param minTweepCred The lowest acceptable user reputation. - * @return A query that filters out results from bad reputation users. - */ - public static Query getBadUserRepFilter(int minTweepCred) { - if (minTweepCred <= 0) { - return null; - } - - return new BooleanQuery.Builder() - .add(new BadUserRepFilter(minTweepCred), BooleanClause.Occur.FILTER) - .build(); - } - - private final int minTweepCred; - - private BadUserRepFilter(int minTweepCred) { - this.minTweepCred = minTweepCred; - } - - @Override - public int hashCode() { - return minTweepCred; - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof BadUserRepFilter)) { - return false; - } - - return minTweepCred == BadUserRepFilter.class.cast(obj).minTweepCred; - } - - @Override - public String toString(String field) { - return "BadUserRepFilter:" + minTweepCred; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - if (!(reader instanceof EarlybirdIndexSegmentAtomicReader)) { - return new AllDocsIterator(reader); - } - - return new BadUserExcludeDocIdSetIterator( - (EarlybirdIndexSegmentAtomicReader) context.reader(), minTweepCred); - } - }; - } - - private static final class BadUserExcludeDocIdSetIterator extends RangeFilterDISI { - private final NumericDocValues userReputationDocValues; - private final int minTweepCred; - - BadUserExcludeDocIdSetIterator(EarlybirdIndexSegmentAtomicReader indexReader, - int minTweepCred) throws IOException { - super(indexReader); - this.userReputationDocValues = - indexReader.getNumericDocValues(EarlybirdFieldConstant.USER_REPUTATION.getFieldName()); - this.minTweepCred = minTweepCred; - } - - @Override - public boolean shouldReturnDoc() throws IOException { - // We need this explicit casting to byte, because of how we encode and decode features in our - // encoded_tweet_features field. If a feature is an int (uses all 32 bits of the int), then - // encoding the feature and then decoding it preserves its original value. However, if the - // feature does not use the entire int (and especially if it uses bits somewhere in the middle - // of the int), then the feature value is assumed to be unsigned when it goes through this - // process of encoding and decoding. So a user rep of - // RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL (-128) will be correctly encoded as the - // binary value 10000000, but will be treated as an unsigned value when decoded, and therefore - // the decoded value will be 128. - // - // In retrospect, this seems like a really poor design decision. It seems like it would be - // better if all feature values were considered to be signed, even if most features can never - // have negative values. Unfortunately, making this change is not easy, because some features - // store normalized values, so we would also need to change the range of allowed values - // produced by those normalizers, as well as all code that depends on those values. - // - // So for now, just cast this value to a byte, to get the proper negative value. - return userReputationDocValues.advanceExact(docID()) - && ((byte) userReputationDocValues.longValue() >= minTweepCred); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.docx new file mode 100644 index 000000000..15d2c2ae1 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.java b/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.java deleted file mode 100644 index f5ba12493..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/CSFDisjunctionFilter.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Objects; -import java.util.Set; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -/** - * CSFDisjunctionFilter provides an efficient mechanism to query for documents that have a - * long CSF equal to one of the provided values. - */ -public final class CSFDisjunctionFilter extends Query { - private final String csfField; - private final Set values; - - public static Query getCSFDisjunctionFilter(String csfField, Set values) { - return new BooleanQuery.Builder() - .add(new CSFDisjunctionFilter(csfField, values), BooleanClause.Occur.FILTER) - .build(); - } - - private CSFDisjunctionFilter(String csfField, Set values) { - this.csfField = csfField; - this.values = values; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - return new CSFDisjunctionFilterDISI(context.reader(), csfField, values); - } - }; - } - - @Override - public int hashCode() { - return (csfField == null ? 0 : csfField.hashCode()) * 17 - + (values == null ? 0 : values.hashCode()); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof CSFDisjunctionFilter)) { - return false; - } - - CSFDisjunctionFilter filter = CSFDisjunctionFilter.class.cast(obj); - return Objects.equals(csfField, filter.csfField) && Objects.equals(values, filter.values); - } - - @Override - public String toString(String field) { - return "CSFDisjunctionFilter:" + csfField + ",count:" + values.size(); - } - - private static final class CSFDisjunctionFilterDISI extends RangeFilterDISI { - private final NumericDocValues docValues; - private final Set values; - - private CSFDisjunctionFilterDISI(LeafReader reader, String csfField, Set values) - throws IOException { - super(reader); - this.values = values; - this.docValues = reader.getNumericDocValues(csfField); - } - - @Override - protected boolean shouldReturnDoc() throws IOException { - return docValues.advanceExact(docID()) && values.contains(docValues.longValue()); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.docx new file mode 100644 index 000000000..293a7517b Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.java b/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.java deleted file mode 100644 index b9b5ad68f..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/DocValRangeFilter.java +++ /dev/null @@ -1,195 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Objects; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.schema.thriftjava.ThriftCSFType; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -/** - * Filters tweets according to the specified CSF field value. - * Note that min value is inclusive, and max value is exclusive. - */ -public final class DocValRangeFilter extends Query { - private final String csfField; - private final ThriftCSFType csfFieldType; - private final Number minValInclusive; - private final Number maxValExclusive; - - /** - * Returns a query that filters hits based on the value of a CSF. - * - * @param csfField The CSF name. - * @param csfFieldType The CSF type. - * @param minVal The minimum acceptable value (inclusive). - * @param maxVal The maximum acceptable value (exclusive). - * @return A query that filters hits based on the value of a CSF. - */ - public static Query getDocValRangeQuery(String csfField, ThriftCSFType csfFieldType, - double minVal, double maxVal) { - return new BooleanQuery.Builder() - .add(new DocValRangeFilter(csfField, csfFieldType, minVal, maxVal), - BooleanClause.Occur.FILTER) - .build(); - } - - /** - * Returns a query that filters hits based on the value of a CSF. - * - * @param csfField The CSF name. - * @param csfFieldType The CSF type. - * @param minVal The minimum acceptable value (inclusive). - * @param maxVal The maximum acceptable value (exclusive). - * @return A query that filters hits based on the value of a CSF. - */ - public static Query getDocValRangeQuery(String csfField, ThriftCSFType csfFieldType, - long minVal, long maxVal) { - return new BooleanQuery.Builder() - .add(new DocValRangeFilter(csfField, csfFieldType, minVal, maxVal), - BooleanClause.Occur.FILTER) - .build(); - } - - private DocValRangeFilter(String csfField, ThriftCSFType csfFieldType, - double minVal, double maxVal) { - this.csfField = csfField; - this.csfFieldType = csfFieldType; - this.minValInclusive = new Float(minVal); - this.maxValExclusive = new Float(maxVal); - } - - private DocValRangeFilter(String csfField, ThriftCSFType csfFieldType, - long minVal, long maxVal) { - this.csfField = csfField; - this.csfFieldType = csfFieldType; - this.minValInclusive = new Long(minVal); - this.maxValExclusive = new Long(maxVal); - } - - @Override - public int hashCode() { - return (csfField == null ? 0 : csfField.hashCode()) * 29 - + (csfFieldType == null ? 0 : csfFieldType.hashCode()) * 17 - + minValInclusive.hashCode() * 7 - + maxValExclusive.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof DocValRangeFilter)) { - return false; - } - - DocValRangeFilter filter = DocValRangeFilter.class.cast(obj); - return Objects.equals(csfField, filter.csfField) - && (csfFieldType == filter.csfFieldType) - && minValInclusive.equals(filter.minValInclusive) - && maxValExclusive.equals(filter.maxValExclusive); - } - - @Override - public String toString(String field) { - return "DocValRangeFilter:" + csfField - + ",type:" + csfFieldType.toString() - + ",min:" + this.minValInclusive.toString() - + ",max:" + this.maxValExclusive.toString(); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - if (csfFieldType == null) { - return new AllDocsIterator(reader); - } - - int smallestDoc = (reader instanceof EarlybirdIndexSegmentAtomicReader) - ? ((EarlybirdIndexSegmentAtomicReader) reader).getSmallestDocID() : 0; - int largestDoc = reader.maxDoc() - 1; - return new CSFRangeDocIdSetIterator(reader, csfField, csfFieldType, - smallestDoc, largestDoc, - minValInclusive, maxValExclusive); - } - }; - } - - private static final class CSFRangeDocIdSetIterator extends RangeFilterDISI { - private final NumericDocValues numericDocValues; - private final ThriftCSFType csfType; - private final Number minValInclusive; - private final Number maxValExclusive; - - public CSFRangeDocIdSetIterator(LeafReader reader, - String csfField, - ThriftCSFType csfType, - int smallestDocID, - int largestDocID, - Number minValInclusive, - Number maxValExclusive) throws IOException { - super(reader, smallestDocID, largestDocID); - this.numericDocValues = reader.getNumericDocValues(csfField); - this.csfType = csfType; - this.minValInclusive = minValInclusive; - this.maxValExclusive = maxValExclusive; - } - - @Override - protected boolean shouldReturnDoc() throws IOException { - if (!numericDocValues.advanceExact(docID())) { - return false; - } - - long val = numericDocValues.longValue(); - switch (csfType) { - case DOUBLE: - double doubleVal = Double.longBitsToDouble(val); - return doubleVal >= minValInclusive.doubleValue() - && doubleVal < maxValExclusive.doubleValue(); - case FLOAT: - float floatVal = Float.intBitsToFloat((int) val); - return floatVal >= minValInclusive.doubleValue() - && floatVal < maxValExclusive.doubleValue(); - case LONG: - return val >= minValInclusive.longValue() && val < maxValExclusive.longValue(); - case INT: - return val >= minValInclusive.longValue() && (int) val < maxValExclusive.longValue(); - case BYTE: - return (byte) val >= minValInclusive.longValue() - && (byte) val < maxValExclusive.longValue(); - default: - return false; - } - } - } - - ////////////////////////// - // for unit tests only - ////////////////////////// - @VisibleForTesting - public Number getMinValForTest() { - return minValInclusive; - } - - @VisibleForTesting - public Number getMaxValForTest() { - return maxValExclusive; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.docx new file mode 100644 index 000000000..9da9c877c Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.java b/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.java deleted file mode 100644 index e4e9d37a7..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/FeatureValueInAcceptListOrUnsetFilter.java +++ /dev/null @@ -1,113 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Set; - -import com.google.common.base.Preconditions; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -public final class FeatureValueInAcceptListOrUnsetFilter extends Query { - - private final String featureName; - private final Set idsAcceptList; - - /** - * Creates a query that filters for hits that have the given feature unset, or that have the - * given feature set to a value in the given list of IDs. - * - * @param featureName The feature. - * @param ids A list of id values this filter will accept for the given feature. - * @return A query that filters out all hits that have the given feature set. - */ - public static Query getFeatureValueInAcceptListOrUnsetFilter(String featureName, Set ids) { - return new BooleanQuery.Builder() - .add(new FeatureValueInAcceptListOrUnsetFilter(featureName, ids), - BooleanClause.Occur.FILTER) - .build(); - } - - @Override - public String toString(String s) { - return String.format("FeatureValueInAcceptListOrUnsetFilter(%s, AcceptList = (%s))", - featureName, - idsAcceptList); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof FeatureValueInAcceptListOrUnsetFilter)) { - return false; - } - - FeatureValueInAcceptListOrUnsetFilter filter = - FeatureValueInAcceptListOrUnsetFilter.class.cast(obj); - return featureName.equals(filter.featureName) && idsAcceptList.equals(filter.idsAcceptList); - } - - @Override - public int hashCode() { - return featureName.hashCode() * 7 + idsAcceptList.hashCode(); - } - - private FeatureValueInAcceptListOrUnsetFilter(String featureName, Set ids) { - this.featureName = Preconditions.checkNotNull(featureName); - this.idsAcceptList = Preconditions.checkNotNull(ids); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - return new FeatureValueInAcceptListOrUnsetDocIdSetIterator( - context.reader(), featureName, idsAcceptList); - } - }; - } - - private static final class FeatureValueInAcceptListOrUnsetDocIdSetIterator - extends RangeFilterDISI { - private final NumericDocValues featureDocValues; - private final Set idsAcceptList; - - FeatureValueInAcceptListOrUnsetDocIdSetIterator( - LeafReader indexReader, String featureName, Set ids) throws IOException { - super(indexReader); - this.featureDocValues = indexReader.getNumericDocValues(featureName); - this.idsAcceptList = ids; - } - - @Override - public boolean shouldReturnDoc() throws IOException { - // If featureDocValues is null, that means there were no documents indexed with the given - // field in the current segment. - // - // The advanceExact() method returns false if it cannot find the given docId in the - // NumericDocValues instance. So if advanceExact() returns false then we know the feature is - // unset. - // However, for realtime Earlybirds we have a custom implementation of NumericDocValues, - // ColumnStrideFieldDocValues, which will contain an entry for every indexed docId and use a - // value of 0 to indicate that a feature is unset. - // - // So to check if a feature is unset for a given docId, we first need to check if we can find - // the docId, and then we additionally need to check if the feature value is 0. - return featureDocValues == null - || !featureDocValues.advanceExact(docID()) - || featureDocValues.longValue() == 0 - || idsAcceptList.contains(featureDocValues.longValue()); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.docx b/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.docx new file mode 100644 index 000000000..62b7db4b2 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.java b/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.java deleted file mode 100644 index cfae5f988..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/GeoTwoPhaseQuery.java +++ /dev/null @@ -1,255 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Set; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.search.TerminationTracker; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; - - -public class GeoTwoPhaseQuery extends Query { - private static final boolean ENABLE_GEO_EARLY_TERMINATION = - EarlybirdConfig.getBool("early_terminate_geo_searches", true); - - private static final int GEO_TIMEOUT_OVERRIDE = - EarlybirdConfig.getInt("early_terminate_geo_searches_timeout_override", -1); - - // How many geo searches are early terminated due to timeout. - private static final SearchCounter GEO_SEARCH_TIMEOUT_COUNT = - SearchCounter.export("geo_search_timeout_count"); - - private final SecondPhaseDocAccepter accepter; - private final TerminationTracker terminationTracker; - private final ConstantScoreQuery query; - - public GeoTwoPhaseQuery( - Query query, SecondPhaseDocAccepter accepter, TerminationTracker terminationTracker) { - this.accepter = accepter; - this.terminationTracker = terminationTracker; - - this.query = new ConstantScoreQuery(query); - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - Query rewritten = query.getQuery().rewrite(reader); - if (rewritten != query.getQuery()) { - return new GeoTwoPhaseQuery(rewritten, accepter, terminationTracker); - } - - return this; - } - - @Override - public int hashCode() { - return query.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof GeoTwoPhaseQuery)) { - return false; - } - GeoTwoPhaseQuery that = (GeoTwoPhaseQuery) obj; - return query.equals(that.query) - && accepter.equals(that.accepter) - && terminationTracker.equals(that.terminationTracker); - } - - @Override - public String toString(String field) { - return new StringBuilder("GeoTwoPhaseQuery(") - .append("Accepter(") - .append(accepter.toString()) - .append(") Geohashes(") - .append(query.getQuery().toString(field)) - .append("))") - .toString(); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - Weight innerWeight = query.createWeight(searcher, scoreMode, boost); - return new GeoTwoPhaseWeight(this, innerWeight, accepter, terminationTracker); - } - - private static final class GeoTwoPhaseWeight extends Weight { - private final Weight innerWeight; - private final SecondPhaseDocAccepter accepter; - private final TerminationTracker terminationTracker; - - private GeoTwoPhaseWeight( - Query query, - Weight innerWeight, - SecondPhaseDocAccepter accepter, - TerminationTracker terminationTracker) { - super(query); - this.innerWeight = innerWeight; - this.accepter = accepter; - this.terminationTracker = terminationTracker; - } - - @Override - public void extractTerms(Set terms) { - innerWeight.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return innerWeight.explain(context, doc); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Scorer innerScorer = innerWeight.scorer(context); - if (innerScorer == null) { - return null; - } - if (ENABLE_GEO_EARLY_TERMINATION - && (terminationTracker == null || !terminationTracker.useLastSearchedDocIdOnTimeout())) { - innerScorer = new ConstantScoreScorer( - this, - 0.0f, - ScoreMode.COMPLETE_NO_SCORES, - new TimedDocIdSetIterator(innerScorer.iterator(), - terminationTracker, - GEO_TIMEOUT_OVERRIDE, - GEO_SEARCH_TIMEOUT_COUNT)); - } - - accepter.initialize(context); - return new GeoTwoPhaseScorer(this, innerScorer, accepter); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return innerWeight.isCacheable(ctx); - } - } - - private static final class GeoTwoPhaseScorer extends Scorer { - private final Scorer innerScorer; - private final SecondPhaseDocAccepter accepter; - - private GeoTwoPhaseScorer(Weight weight, Scorer innerScorer, SecondPhaseDocAccepter accepter) { - super(weight); - this.innerScorer = innerScorer; - this.accepter = accepter; - } - - @Override - public TwoPhaseIterator twoPhaseIterator() { - return new TwoPhaseIterator(innerScorer.iterator()) { - @Override - public boolean matches() throws IOException { - return checkDocExpensive(innerScorer.docID()); - } - - @Override - public float matchCost() { - return 0.0f; - } - }; - } - - @Override - public int docID() { - return iterator().docID(); - } - - @Override - public float score() throws IOException { - return innerScorer.score(); - } - - @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - private int doNext(int startingDocId) throws IOException { - int docId = startingDocId; - while ((docId != NO_MORE_DOCS) && !checkDocExpensive(docId)) { - docId = innerScorer.iterator().nextDoc(); - } - return docId; - } - - @Override - public int docID() { - return innerScorer.iterator().docID(); - } - - @Override - public int nextDoc() throws IOException { - return doNext(innerScorer.iterator().nextDoc()); - } - - @Override - public int advance(int target) throws IOException { - return doNext(innerScorer.iterator().advance(target)); - } - - @Override - public long cost() { - return 2 * innerScorer.iterator().cost(); - } - }; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return innerScorer.getMaxScore(upTo); - } - - private boolean checkDocExpensive(int doc) throws IOException { - return accepter.accept(doc); - } - } - - public abstract static class SecondPhaseDocAccepter { - /** - * Initializes this accepter with the given reader context. - */ - public abstract void initialize(LeafReaderContext context) throws IOException; - - /** - * Determines if the given doc ID is accepted by this accepter. - */ - public abstract boolean accept(int doc) throws IOException; - - /** - * Returns a string description for this SecondPhaseDocAccepter instance. - */ - public abstract String toString(); - } - - public static final SecondPhaseDocAccepter ALL_DOCS_ACCEPTER = new SecondPhaseDocAccepter() { - @Override - public void initialize(LeafReaderContext context) { } - - @Override - public boolean accept(int doc) { - return true; - } - - @Override - public String toString() { - return "AllDocsAccepter"; - } - }; -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.docx b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.docx new file mode 100644 index 000000000..12cdb0db8 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.java b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.java deleted file mode 100644 index 27c194678..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocIdSet.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.RamUsageEstimator; - -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; - -public final class MatchAllDocIdSet extends DocIdSet { - private final LeafReader reader; - - public MatchAllDocIdSet(LeafReader reader) { - this.reader = reader; - } - - @Override - public DocIdSetIterator iterator() throws IOException { - return new AllDocsIterator(reader); - } - - @Override - public Bits bits() throws IOException { - return new Bits() { - @Override - public boolean get(int index) { - return true; - } - - @Override - public int length() { - return reader.maxDoc(); - } - }; - } - - @Override - public long ramBytesUsed() { - return RamUsageEstimator.shallowSizeOf(this); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.docx b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.docx new file mode 100644 index 000000000..826513aeb Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.java b/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.java deleted file mode 100644 index 5b2b649f5..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/MatchAllDocsQuery.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Set; - -import com.google.common.base.Preconditions; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; -import com.twitter.search.earlybird.index.EarlybirdSingleSegmentSearcher; - -/** - * A MatchAllDocsQuery implementation that does not assume that doc IDs are assigned sequentially. - * Instead, it wraps the EarlybirdIndexSegmentAtomicReader into a RangeFilterDISI, and uses - * this iterator to traverse only the valid doc IDs in this segment. - * - * Note that org.apache.lucene.index.MatchAllDocsQuery is final, so we cannot extend it. - */ -public class MatchAllDocsQuery extends Query { - private static class MatchAllDocsWeight extends Weight { - private final Weight luceneWeight; - - public MatchAllDocsWeight(Query query, Weight luceneWeight) { - super(query); - this.luceneWeight = luceneWeight; - } - - @Override - public void extractTerms(Set terms) { - luceneWeight.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return luceneWeight.explain(context, doc); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Preconditions.checkState(context.reader() instanceof EarlybirdIndexSegmentAtomicReader, - "Expected an EarlybirdIndexSegmentAtomicReader, but got a " - + context.reader().getClass().getName() + " instance."); - EarlybirdIndexSegmentAtomicReader reader = - (EarlybirdIndexSegmentAtomicReader) context.reader(); - return new ConstantScoreScorer( - this, 1.0f, ScoreMode.COMPLETE_NO_SCORES, new RangeFilterDISI(reader)); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return luceneWeight.isCacheable(ctx); - } - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - org.apache.lucene.search.MatchAllDocsQuery luceneMatchAllDocsQuery = - new org.apache.lucene.search.MatchAllDocsQuery(); - Weight luceneWeight = luceneMatchAllDocsQuery.createWeight(searcher, scoreMode, boost); - if (!(searcher instanceof EarlybirdSingleSegmentSearcher)) { - return luceneWeight; - } - return new MatchAllDocsWeight(this, luceneWeight); - } - - @Override - public int hashCode() { - return 0; - } - - @Override - public boolean equals(Object obj) { - return obj instanceof MatchAllDocsQuery; - } - - // Copied from org.apache.lucene.search.MatchAllDocsWeight - @Override - public String toString(String field) { - return "*:*"; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.docx new file mode 100644 index 000000000..04ae50c09 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.java b/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.java deleted file mode 100644 index e62de315f..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/RequiredStatusIDsFilter.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - -import com.google.common.base.Preconditions; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.search.IntArrayDocIdSetIterator; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.earlybird.index.TweetIDMapper; - -public final class RequiredStatusIDsFilter extends Query { - private final Collection statusIDs; - - public static Query getRequiredStatusIDsQuery(Collection statusIDs) { - return new BooleanQuery.Builder() - .add(new RequiredStatusIDsFilter(statusIDs), BooleanClause.Occur.FILTER) - .build(); - } - - private RequiredStatusIDsFilter(Collection statusIDs) { - this.statusIDs = Preconditions.checkNotNull(statusIDs); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader leafReader = context.reader(); - if (!(leafReader instanceof EarlybirdIndexSegmentAtomicReader)) { - return DocIdSetIterator.empty(); - } - - EarlybirdIndexSegmentAtomicReader reader = (EarlybirdIndexSegmentAtomicReader) leafReader; - TweetIDMapper idMapper = (TweetIDMapper) reader.getSegmentData().getDocIDToTweetIDMapper(); - - int docIdsSize = 0; - int[] docIds = new int[statusIDs.size()]; - for (long statusID : statusIDs) { - int docId = idMapper.getDocID(statusID); - if (docId >= 0) { - docIds[docIdsSize++] = docId; - } - } - - Arrays.sort(docIds, 0, docIdsSize); - DocIdSetIterator statusesDISI = - new IntArrayDocIdSetIterator(Arrays.copyOf(docIds, docIdsSize)); - DocIdSetIterator allDocsDISI = new AllDocsIterator(reader); - - // We only want to return IDs for fully indexed documents. So we need to make sure that - // every doc ID we return exists in allDocsDISI. However, allDocsDISI has all documents in - // this segment, so driving by allDocsDISI would be very slow. So we want to drive by - // statusesDISI, and use allDocsDISI as a post-filter. What this comes down to is that we do - // not want to call allDocsDISI.nextDoc(); we only want to call allDocsDISI.advance(), and - // only on the doc IDs returned by statusesDISI. - return new DocIdSetIterator() { - @Override - public int docID() { - return statusesDISI.docID(); - } - - @Override - public int nextDoc() throws IOException { - statusesDISI.nextDoc(); - return advanceToNextFullyIndexedDoc(); - } - - @Override - public int advance(int target) throws IOException { - statusesDISI.advance(target); - return advanceToNextFullyIndexedDoc(); - } - - private int advanceToNextFullyIndexedDoc() throws IOException { - while (docID() != DocIdSetIterator.NO_MORE_DOCS) { - // Check if the current doc is fully indexed. - // If it is, then we can return it. If it's not, then we need to keep searching. - int allDocsDocId = allDocsDISI.advance(docID()); - if (allDocsDocId == docID()) { - break; - } - - statusesDISI.advance(allDocsDocId); - } - return docID(); - } - - @Override - public long cost() { - return statusesDISI.cost(); - } - }; - } - }; - } - - @Override - public int hashCode() { - return statusIDs.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof RequiredStatusIDsFilter)) { - return false; - } - - RequiredStatusIDsFilter filter = RequiredStatusIDsFilter.class.cast(obj); - return statusIDs.equals(filter.statusIDs); - } - - @Override - public final String toString(String field) { - return String.format("RequiredStatusIDs[%s]", statusIDs); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.docx b/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.docx new file mode 100644 index 000000000..ea0e4bc04 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.java b/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.java deleted file mode 100644 index 9981ef2ab..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/SimpleTermQuery.java +++ /dev/null @@ -1,86 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -/** - * A version of a term query that we can use when we already know the term id (in case where we - * previously looked it up), and have a TermsEnum to get the actual postings. - * - * This is can be used for constant score queries, where only iterating on the postings is required. - */ -class SimpleTermQuery extends Query { - private final TermsEnum termsEnum; - private final long termId; - - public SimpleTermQuery(TermsEnum termsEnum, long termId) { - this.termsEnum = termsEnum; - this.termId = termId; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - return new SimpleTermQueryWeight(scoreMode); - } - - @Override - public int hashCode() { - return (termsEnum == null ? 0 : termsEnum.hashCode()) * 13 + (int) termId; - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof SimpleTermQuery)) { - return false; - } - - SimpleTermQuery query = SimpleTermQuery.class.cast(obj); - return (termsEnum == null ? query.termsEnum == null : termsEnum.equals(query.termsEnum)) - && (termId == query.termId); - } - - @Override - public String toString(String field) { - return "SimpleTermQuery(" + field + ":" + termId + ")"; - } - - private class SimpleTermQueryWeight extends ConstantScoreWeight { - private final ScoreMode scoreMode; - - public SimpleTermQueryWeight(ScoreMode scoreMode) { - super(SimpleTermQuery.this, 1.0f); - this.scoreMode = scoreMode; - } - - @Override - public String toString() { - return "weight(" + SimpleTermQuery.this + ")"; - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - termsEnum.seekExact(termId); - - PostingsEnum docs = termsEnum.postings( - null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); - assert docs != null; - return new ConstantScoreScorer(this, 0, scoreMode, docs); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return true; - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.docx new file mode 100644 index 000000000..b79f40a08 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.java b/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.java deleted file mode 100644 index aae8fcf2f..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/SinceMaxIDFilter.java +++ /dev/null @@ -1,211 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.core.earlybird.index.DocIDToTweetIDMapper; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; -import com.twitter.search.earlybird.index.TweetIDMapper; - -/** - * Filters tweet ids according to since_id and max_id parameter. - * - * Note that since_id is exclusive and max_id is inclusive. - */ -public final class SinceMaxIDFilter extends Query { - public static final long NO_FILTER = -1; - - private final long sinceIdExclusive; - private final long maxIdInclusive; - - public static Query getSinceMaxIDQuery(long sinceIdExclusive, long maxIdInclusive) { - return new BooleanQuery.Builder() - .add(new SinceMaxIDFilter(sinceIdExclusive, maxIdInclusive), BooleanClause.Occur.FILTER) - .build(); - } - - public static Query getSinceIDQuery(long sinceIdExclusive) { - return new BooleanQuery.Builder() - .add(new SinceMaxIDFilter(sinceIdExclusive, NO_FILTER), BooleanClause.Occur.FILTER) - .build(); - } - - public static Query getMaxIDQuery(long maxIdInclusive) { - return new BooleanQuery.Builder() - .add(new SinceMaxIDFilter(NO_FILTER, maxIdInclusive), BooleanClause.Occur.FILTER) - .build(); - } - - private SinceMaxIDFilter(long sinceIdExclusive, long maxIdInclusive) { - this.sinceIdExclusive = sinceIdExclusive; - this.maxIdInclusive = maxIdInclusive; - } - - @Override - public int hashCode() { - return (int) (sinceIdExclusive * 13 + maxIdInclusive); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof SinceMaxIDFilter)) { - return false; - } - - SinceMaxIDFilter filter = SinceMaxIDFilter.class.cast(obj); - return (sinceIdExclusive == filter.sinceIdExclusive) - && (maxIdInclusive == filter.maxIdInclusive); - } - - @Override - public String toString(String field) { - if (sinceIdExclusive != NO_FILTER && maxIdInclusive != NO_FILTER) { - return "SinceIdFilter:" + sinceIdExclusive + ",MaxIdFilter:" + maxIdInclusive; - } else if (maxIdInclusive != NO_FILTER) { - return "MaxIdFilter:" + maxIdInclusive; - } else { - return "SinceIdFilter:" + sinceIdExclusive; - } - } - - /** - * Determines if this segment is at least partially covered by the given tweet ID range. - */ - public static boolean sinceMaxIDsInRange( - TweetIDMapper tweetIdMapper, long sinceIdExclusive, long maxIdInclusive) { - // Check for since id out of range. Note that since this ID is exclusive, - // equality is out of range too. - if (sinceIdExclusive != NO_FILTER && sinceIdExclusive >= tweetIdMapper.getMaxTweetID()) { - return false; - } - - // Check for max id in range. - return maxIdInclusive == NO_FILTER || maxIdInclusive >= tweetIdMapper.getMinTweetID(); - } - - // Returns true if this segment is completely covered by these id filters. - private static boolean sinceMaxIdsCoverRange( - TweetIDMapper tweetIdMapper, long sinceIdExclusive, long maxIdInclusive) { - // Check for since_id specified AND since_id newer than than first tweet. - if (sinceIdExclusive != NO_FILTER && sinceIdExclusive >= tweetIdMapper.getMinTweetID()) { - return false; - } - - // Check for max id in range. - return maxIdInclusive == NO_FILTER || maxIdInclusive > tweetIdMapper.getMaxTweetID(); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - if (!(reader instanceof EarlybirdIndexSegmentAtomicReader)) { - return new AllDocsIterator(reader); - } - - EarlybirdIndexSegmentAtomicReader twitterInMemoryIndexReader = - (EarlybirdIndexSegmentAtomicReader) reader; - TweetIDMapper tweetIdMapper = - (TweetIDMapper) twitterInMemoryIndexReader.getSegmentData().getDocIDToTweetIDMapper(); - - // Important to return a null DocIdSetIterator here, so the Scorer will skip searching - // this segment completely. - if (!sinceMaxIDsInRange(tweetIdMapper, sinceIdExclusive, maxIdInclusive)) { - return null; - } - - // Optimization: just return a match-all iterator when the whole segment is in range. - // This avoids having to do so many status id lookups. - if (sinceMaxIdsCoverRange(tweetIdMapper, sinceIdExclusive, maxIdInclusive)) { - return new AllDocsIterator(reader); - } - - return new SinceMaxIDDocIdSetIterator( - twitterInMemoryIndexReader, sinceIdExclusive, maxIdInclusive); - } - }; - } - - @VisibleForTesting - static class SinceMaxIDDocIdSetIterator extends RangeFilterDISI { - private final DocIDToTweetIDMapper docIdToTweetIdMapper; - private final long sinceIdExclusive; - private final long maxIdInclusive; - - public SinceMaxIDDocIdSetIterator(EarlybirdIndexSegmentAtomicReader reader, - long sinceIdExclusive, - long maxIdInclusive) throws IOException { - super(reader, - findMaxIdDocID(reader, maxIdInclusive), - findSinceIdDocID(reader, sinceIdExclusive)); - this.docIdToTweetIdMapper = reader.getSegmentData().getDocIDToTweetIDMapper(); - this.sinceIdExclusive = sinceIdExclusive; // sinceStatusId == NO_FILTER is OK, it's exclusive - this.maxIdInclusive = maxIdInclusive != NO_FILTER ? maxIdInclusive : Long.MAX_VALUE; - } - - /** - * This is a necessary check when we have out of order tweets in the archive. - * When tweets are out of order, this guarantees that no false positive results are returned. - * I.e. we can still miss some tweets in the specified range, but we never incorrectly return - * anything that's not in the range. - */ - @Override - protected boolean shouldReturnDoc() { - final long statusID = docIdToTweetIdMapper.getTweetID(docID()); - return statusID > sinceIdExclusive && statusID <= maxIdInclusive; - } - - private static int findSinceIdDocID( - EarlybirdIndexSegmentAtomicReader reader, long sinceIdExclusive) throws IOException { - TweetIDMapper tweetIdMapper = - (TweetIDMapper) reader.getSegmentData().getDocIDToTweetIDMapper(); - if (sinceIdExclusive != SinceMaxIDFilter.NO_FILTER) { - // We use this as an upper bound on the search, so we want to find the highest possible - // doc ID for this tweet ID. - boolean findMaxDocID = true; - return tweetIdMapper.findDocIdBound( - sinceIdExclusive, - findMaxDocID, - reader.getSmallestDocID(), - reader.maxDoc() - 1); - } else { - return DocIDToTweetIDMapper.ID_NOT_FOUND; - } - } - - private static int findMaxIdDocID( - EarlybirdIndexSegmentAtomicReader reader, long maxIdInclusive) throws IOException { - TweetIDMapper tweetIdMapper = - (TweetIDMapper) reader.getSegmentData().getDocIDToTweetIDMapper(); - if (maxIdInclusive != SinceMaxIDFilter.NO_FILTER) { - // We use this as a lower bound on the search, so we want to find the lowest possible - // doc ID for this tweet ID. - boolean findMaxDocID = false; - return tweetIdMapper.findDocIdBound( - maxIdInclusive, - findMaxDocID, - reader.getSmallestDocID(), - reader.maxDoc() - 1); - } else { - return DocIDToTweetIDMapper.ID_NOT_FOUND; - } - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.docx new file mode 100644 index 000000000..e7e0c0b2e Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.java b/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.java deleted file mode 100644 index 1f68975c4..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/SinceUntilFilter.java +++ /dev/null @@ -1,137 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.TimeMapper; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -// Filters tweets according to since time and until time (in seconds). -// Note that since time is inclusive, and until time is exclusive. -public final class SinceUntilFilter extends Query { - public static final int NO_FILTER = -1; - - // These are both in seconds since the epoch. - private final int minTimeInclusive; - private final int maxTimeExclusive; - - public static Query getSinceQuery(int sinceTimeSeconds) { - return new BooleanQuery.Builder() - .add(new SinceUntilFilter(sinceTimeSeconds, NO_FILTER), BooleanClause.Occur.FILTER) - .build(); - } - - public static Query getUntilQuery(int untilTimeSeconds) { - return new BooleanQuery.Builder() - .add(new SinceUntilFilter(NO_FILTER, untilTimeSeconds), BooleanClause.Occur.FILTER) - .build(); - } - - public static Query getSinceUntilQuery(int sinceTimeSeconds, int untilTimeSeconds) { - return new BooleanQuery.Builder() - .add(new SinceUntilFilter(sinceTimeSeconds, untilTimeSeconds), BooleanClause.Occur.FILTER) - .build(); - } - - private SinceUntilFilter(int sinceTime, int untilTime) { - this.minTimeInclusive = sinceTime != NO_FILTER ? sinceTime : 0; - this.maxTimeExclusive = untilTime != NO_FILTER ? untilTime : Integer.MAX_VALUE; - } - - @Override - public int hashCode() { - return (int) (minTimeInclusive * 17 + maxTimeExclusive); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof SinceUntilFilter)) { - return false; - } - - SinceUntilFilter filter = SinceUntilFilter.class.cast(obj); - return (minTimeInclusive == filter.minTimeInclusive) - && (maxTimeExclusive == filter.maxTimeExclusive); - } - - @Override - public String toString(String field) { - if (minTimeInclusive > 0 && maxTimeExclusive != Integer.MAX_VALUE) { - return "SinceFilter:" + this.minTimeInclusive + ",UntilFilter:" + maxTimeExclusive; - } else if (minTimeInclusive > 0) { - return "SinceFilter:" + this.minTimeInclusive; - } else { - return "UntilFilter:" + this.maxTimeExclusive; - } - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader indexReader = context.reader(); - if (!(indexReader instanceof EarlybirdIndexSegmentAtomicReader)) { - return new AllDocsIterator(indexReader); - } - - EarlybirdIndexSegmentAtomicReader reader = (EarlybirdIndexSegmentAtomicReader) indexReader; - TimeMapper timeMapper = reader.getSegmentData().getTimeMapper(); - int smallestDocID = timeMapper.findFirstDocId(maxTimeExclusive, reader.getSmallestDocID()); - int largestDoc = timeMapper.findFirstDocId(minTimeInclusive, reader.getSmallestDocID()); - int smallestDoc = smallestDocID > 0 ? smallestDocID - 1 : 0; - return new SinceUntilDocIdSetIterator( - reader, - timeMapper, - smallestDoc, - largestDoc, - minTimeInclusive, - maxTimeExclusive); - } - }; - } - - // Returns true if this TimeMapper is at least partially covered by these time filters. - public static boolean sinceUntilTimesInRange( - TimeMapper timeMapper, int sinceTime, int untilTime) { - return (sinceTime == NO_FILTER || sinceTime <= timeMapper.getLastTime()) - && (untilTime == NO_FILTER || untilTime >= timeMapper.getFirstTime()); - } - - private static final class SinceUntilDocIdSetIterator extends RangeFilterDISI { - private final TimeMapper timeMapper; - private final int minTimeInclusive; - private final int maxTimeExclusive; - - public SinceUntilDocIdSetIterator(EarlybirdIndexSegmentAtomicReader reader, - TimeMapper timeMapper, - int smallestDocID, - int largestDocID, - int minTimeInclusive, - int maxExclusive) throws IOException { - super(reader, smallestDocID, largestDocID); - this.timeMapper = timeMapper; - this.minTimeInclusive = minTimeInclusive; - this.maxTimeExclusive = maxExclusive; - } - - @Override - protected boolean shouldReturnDoc() { - final int docTime = timeMapper.getTime(docID()); - return docTime >= minTimeInclusive && docTime < maxTimeExclusive; - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.docx b/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.docx new file mode 100644 index 000000000..64fa6bc76 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.java b/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.java deleted file mode 100644 index 3ae4a0c15..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/TermQueryWithSafeToString.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import org.apache.lucene.index.Term; -import org.apache.lucene.search.TermQuery; - -/** - * Work around an issue where IntTerms and LongTerms are not valid utf8, - * so calling toString on any TermQuery containing an IntTerm or a LongTerm may cause exceptions. - * This code should produce the same output as TermQuery.toString - */ -public final class TermQueryWithSafeToString extends TermQuery { - private final String termValueForToString; - - public TermQueryWithSafeToString(Term term, String termValueForToString) { - super(term); - this.termValueForToString = termValueForToString; - } - - @Override - public String toString(String field) { - StringBuilder buffer = new StringBuilder(); - if (!getTerm().field().equals(field)) { - buffer.append(getTerm().field()); - buffer.append(":"); - } - buffer.append(termValueForToString); - return buffer.toString(); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.docx b/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.docx new file mode 100644 index 000000000..99332682e Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.java b/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.java deleted file mode 100644 index e6d65868b..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/TimedDocIdSetIterator.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.lucene.search.DocIdSetIterator; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.search.EarlyTerminationState; -import com.twitter.search.common.search.TerminationTracker; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; - -/** - * DocIdSetIterator whose nextDoc() and advance() will early terminate by returning NO_MORE_DOCS - * after the given deadline. - */ -public class TimedDocIdSetIterator extends DocIdSetIterator { - // check deadline every NEXT_CALL_TIMEOUT_CHECK_PERIOD calls to nextDoc() - @VisibleForTesting - protected static final int NEXT_CALL_TIMEOUT_CHECK_PERIOD = - EarlybirdConfig.getInt("timed_doc_id_set_next_doc_deadline_check_period", 1000); - - - // check deadline every ADVANCE_CALL_TIMEOUT_CHECK_PERIOD calls to advance() - private static final int ADVANCE_CALL_TIMEOUT_CHECK_PERIOD = - EarlybirdConfig.getInt("timed_doc_id_set_advance_deadline_check_period", 100); - - private final Clock clock; - private final DocIdSetIterator innerIterator; - private final SearchCounter timeoutCountStat; - - @Nullable - private final TerminationTracker terminationTracker; - private final long deadlineMillisFromEpoch; - - private int docId = -1; - private int nextCounter = 0; - private int advanceCounter = 0; - - public TimedDocIdSetIterator(DocIdSetIterator innerIterator, - @Nullable TerminationTracker terminationTracker, - final long timeoutOverride, - @Nullable SearchCounter timeoutCountStat) { - this(innerIterator, terminationTracker, timeoutOverride, timeoutCountStat, Clock.SYSTEM_CLOCK); - } - - protected TimedDocIdSetIterator(DocIdSetIterator innerIterator, - @Nullable TerminationTracker terminationTracker, - final long timeoutOverride, - @Nullable SearchCounter timeoutCountStat, - Clock clock) { - this.clock = clock; - this.innerIterator = innerIterator; - this.timeoutCountStat = timeoutCountStat; - this.terminationTracker = terminationTracker; - - if (terminationTracker == null) { - deadlineMillisFromEpoch = -1; - } else { - if (timeoutOverride > 0) { - deadlineMillisFromEpoch = terminationTracker.getClientStartTimeMillis() + timeoutOverride; - } else { - deadlineMillisFromEpoch = terminationTracker.getTimeoutEndTimeWithReservation(); - } - } - } - - @VisibleForTesting - protected TimedDocIdSetIterator(DocIdSetIterator innerIterator, - final long deadline, - @Nullable SearchCounter timeoutCountStat, - Clock clock) { - this.clock = clock; - this.innerIterator = innerIterator; - this.timeoutCountStat = timeoutCountStat; - this.terminationTracker = null; - - this.deadlineMillisFromEpoch = deadline; - } - - - @Override - public int docID() { - return docId; - } - - @Override - public int nextDoc() throws IOException { - if (++nextCounter % NEXT_CALL_TIMEOUT_CHECK_PERIOD == 0 - && clock.nowMillis() > deadlineMillisFromEpoch) { - if (timeoutCountStat != null) { - timeoutCountStat.increment(); - } - if (terminationTracker != null) { - terminationTracker.setEarlyTerminationState( - EarlyTerminationState.TERMINATED_TIME_OUT_EXCEEDED); - } - - return docId = NO_MORE_DOCS; - } - return docId = innerIterator.nextDoc(); - } - - @Override - public int advance(int target) throws IOException { - if (++advanceCounter % ADVANCE_CALL_TIMEOUT_CHECK_PERIOD == 0 - && clock.nowMillis() > deadlineMillisFromEpoch) { - if (timeoutCountStat != null) { - timeoutCountStat.increment(); - } - if (terminationTracker != null) { - terminationTracker.setEarlyTerminationState( - EarlyTerminationState.TERMINATED_TIME_OUT_EXCEEDED); - } - return docId = NO_MORE_DOCS; - } - - return docId = innerIterator.advance(target); - } - - @Override - public long cost() { - return innerIterator.cost(); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.docx new file mode 100644 index 000000000..2f600978d Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.java b/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.java deleted file mode 100644 index a3d0890ff..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/UserFlagsExcludeFilter.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.index.util.AllDocsIterator; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; -import com.twitter.search.earlybird.common.userupdates.UserTable; - -public final class UserFlagsExcludeFilter extends Query { - /** - * Returns a query that filters hits based on their author flags. - * - * @param excludeAntisocial Determines if the filter should exclude hits from antisocial users. - * @param excludeOffensive Determines if the filter should exclude hits from offensive users. - * @param excludeProtected Determines if the filter should exclude hits from protected users - * @return A query that filters hits based on their author flags. - */ - public static Query getUserFlagsExcludeFilter(UserTable userTable, - boolean excludeAntisocial, - boolean excludeOffensive, - boolean excludeProtected) { - return new BooleanQuery.Builder() - .add(new UserFlagsExcludeFilter( - userTable, excludeAntisocial, excludeOffensive, excludeProtected), - BooleanClause.Occur.FILTER) - .build(); - } - - private final UserTable userTable; - private final boolean excludeAntisocial; - private final boolean excludeOffensive; - private final boolean excludeProtected; - - private UserFlagsExcludeFilter( - UserTable userTable, - boolean excludeAntisocial, - boolean excludeOffensive, - boolean excludeProtected) { - this.userTable = userTable; - this.excludeAntisocial = excludeAntisocial; - this.excludeOffensive = excludeOffensive; - this.excludeProtected = excludeProtected; - } - - @Override - public int hashCode() { - return (excludeAntisocial ? 13 : 0) + (excludeOffensive ? 1 : 0) + (excludeProtected ? 2 : 0); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof UserFlagsExcludeFilter)) { - return false; - } - - UserFlagsExcludeFilter filter = UserFlagsExcludeFilter.class.cast(obj); - return (excludeAntisocial == filter.excludeAntisocial) - && (excludeOffensive == filter.excludeOffensive) - && (excludeProtected == filter.excludeProtected); - } - - @Override - public String toString(String field) { - return "UserFlagsExcludeFilter"; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); - if (userTable == null) { - return new AllDocsIterator(reader); - } - - final int bits = - (excludeAntisocial ? UserTable.ANTISOCIAL_BIT : 0) - | (excludeOffensive ? UserTable.OFFENSIVE_BIT | UserTable.NSFW_BIT : 0) - | (excludeProtected ? UserTable.IS_PROTECTED_BIT : 0); - if (bits != 0) { - return new UserFlagsExcludeDocIdSetIterator(reader, userTable) { - @Override - protected boolean checkUserFlags(UserTable table, long userID) { - return !table.isSet(userID, bits); - } - }; - } - - return new AllDocsIterator(reader); - } - }; - } - - private abstract static class UserFlagsExcludeDocIdSetIterator extends RangeFilterDISI { - private final UserTable userTable; - private final NumericDocValues fromUserID; - - public UserFlagsExcludeDocIdSetIterator( - LeafReader indexReader, UserTable table) throws IOException { - super(indexReader); - userTable = table; - fromUserID = - indexReader.getNumericDocValues(EarlybirdFieldConstant.FROM_USER_ID_CSF.getFieldName()); - } - - @Override - protected boolean shouldReturnDoc() throws IOException { - return fromUserID.advanceExact(docID()) - && checkUserFlags(userTable, fromUserID.longValue()); - } - - protected abstract boolean checkUserFlags(UserTable table, long userID); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.docx b/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.docx new file mode 100644 index 000000000..4dd46ddb6 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.java b/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.java deleted file mode 100644 index 891a21bd6..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/UserIdMultiSegmentQuery.java +++ /dev/null @@ -1,528 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BulkScorer; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.BytesRef; - -import com.twitter.decider.Decider; -import com.twitter.search.common.decider.DeciderUtil; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchTimer; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.common.query.HitAttributeHelper; -import com.twitter.search.common.query.IDDisjunctionQuery; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.base.IndexedNumericFieldSettings; -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.common.search.termination.QueryTimeout; -import com.twitter.search.common.util.analysis.LongTermAttributeImpl; -import com.twitter.search.common.util.analysis.SortableLongTermAttributeImpl; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentData; -import com.twitter.search.core.earlybird.index.inverted.InvertedIndex; -import com.twitter.search.core.earlybird.index.inverted.MultiSegmentTermDictionary; -import com.twitter.search.earlybird.partition.MultiSegmentTermDictionaryManager; -import com.twitter.search.earlybird.queryparser.EarlybirdQueryHelper; -import com.twitter.search.queryparser.query.QueryParserException; - -/** - * A variant of a multi-term ID disjunction query (similar to {@link UserIdMultiSegmentQuery}), - * that also uses a {@link MultiSegmentTermDictionary} where available, for more efficient - * term lookups for queries that span multiple segments. - * - * By default, a IDDisjunctionQuery (or Lucene's MultiTermQuery), does a term dictionary lookup - * for all of the terms in its disjunction, and it does it once for each segment (or AtomicReader) - * that the query is searching. - * This means that when the term dictionary is large, and the term lookups are expensive, and when - * we are searching multiple segments, the query needs to make num_terms * num_segments expensive - * term dictionary lookups. - * - * With the help of a MultiSegmentTermDictionary, this multi-term disjunction query implementation - * only does one lookup for all of the segments managed by the MultiSegmentTermDictionary. - * If a segment is not supported by the MultiSegmentTermDictionary (e.g. if it's not optimized yet), - * a regular lookup in that segment's term dictionary will be performed. - * - * Usually, we will make 'num_terms' lookups in the current, un-optimized segment, and then if - * more segments need to be searched, we will make another 'num_terms' lookups, once for all of - * the remaining segments. - * - * When performing lookups in the MultiSegmentTermDictionary, for each supported segment, we save - * a list of termIds from that segment for all the searched terms that appear in that segment. - * - * For example, when querying for UserIdMultiSegmentQuery with user ids: {1L, 2L, 3L} and - * segments: {1, 2}, where segment 1 has user ids {1L, 2L} indexed under termIds {100, 200}, - * and segment 2 has user ids {1L, 2L, 3L} indexed under termIds {200, 300, 400}, we will build - * up the following map once: - * segment1 -> [100, 200] - * segment2 -> [200, 300, 400] - */ -public class UserIdMultiSegmentQuery extends Query { - @VisibleForTesting - public static final SearchTimerStats TERM_LOOKUP_STATS = - SearchTimerStats.export("multi_segment_query_term_lookup", TimeUnit.NANOSECONDS, false); - public static final SearchTimerStats QUERY_FROM_PRECOMPUTED = - SearchTimerStats.export("multi_segment_query_from_precomputed", TimeUnit.NANOSECONDS, false); - public static final SearchTimerStats QUERY_REGULAR = - SearchTimerStats.export("multi_segment_query_regular", TimeUnit.NANOSECONDS, false); - - @VisibleForTesting - public static final SearchCounter USED_MULTI_SEGMENT_TERM_DICTIONARY_COUNT = SearchCounter.export( - "user_id_multi_segment_query_used_multi_segment_term_dictionary_count"); - @VisibleForTesting - public static final SearchCounter USED_ORIGINAL_TERM_DICTIONARY_COUNT = SearchCounter.export( - "user_id_multi_segment_query_used_original_term_dictionary_count"); - - private static final SearchCounter NEW_QUERY_COUNT = - SearchCounter.export("user_id_multi_segment_new_query_count"); - private static final SearchCounter OLD_QUERY_COUNT = - SearchCounter.export("user_id_multi_segment_old_query_count"); - - private static final HashMap QUERY_COUNT_BY_QUERY_NAME = new HashMap<>(); - private static final HashMap QUERY_COUNT_BY_FIELD_NAME = new HashMap<>(); - - private static final String DECIDER_KEY_PREFIX = "use_multi_segment_id_disjunction_queries_in_"; - - /** - * Returns a new user ID disjunction query. - * - * @param ids The user IDs. - * @param field The field storing the user IDs. - * @param schemaSnapshot A snapshot of earlybird's schema. - * @param multiSegmentTermDictionaryManager The manager for the term dictionaries that span - * multiple segments. - * @param decider The decider. - * @param earlybirdCluster The earlybird cluster. - * @param ranks The hit attribution ranks to be assigned to every user ID. - * @param hitAttributeHelper The helper that tracks hit attributions. - * @param queryTimeout The timeout to be enforced on this query. - * @return A new user ID disjunction query. - */ - public static Query createIdDisjunctionQuery( - String queryName, - List ids, - String field, - ImmutableSchemaInterface schemaSnapshot, - MultiSegmentTermDictionaryManager multiSegmentTermDictionaryManager, - Decider decider, - EarlybirdCluster earlybirdCluster, - List ranks, - @Nullable HitAttributeHelper hitAttributeHelper, - @Nullable QueryTimeout queryTimeout) throws QueryParserException { - QUERY_COUNT_BY_QUERY_NAME.computeIfAbsent(queryName, name -> - SearchCounter.export("multi_segment_query_name_" + name)).increment(); - QUERY_COUNT_BY_FIELD_NAME.computeIfAbsent(field, name -> - SearchCounter.export("multi_segment_query_count_for_field_" + name)).increment(); - - if (DeciderUtil.isAvailableForRandomRecipient(decider, getDeciderName(earlybirdCluster))) { - NEW_QUERY_COUNT.increment(); - MultiSegmentTermDictionary multiSegmentTermDictionary = - multiSegmentTermDictionaryManager.getMultiSegmentTermDictionary(field); - return new UserIdMultiSegmentQuery( - ids, - field, - schemaSnapshot, - multiSegmentTermDictionary, - ranks, - hitAttributeHelper, - queryTimeout); - } else { - OLD_QUERY_COUNT.increment(); - return new IDDisjunctionQuery(ids, field, schemaSnapshot); - } - } - - @VisibleForTesting - public static String getDeciderName(EarlybirdCluster earlybirdCluster) { - return DECIDER_KEY_PREFIX + earlybirdCluster.name().toLowerCase(); - } - - private final boolean useOrderPreservingEncoding; - private final HitAttributeHelper hitAttributeHelper; - private final QueryTimeout queryTimeout; - private final MultiSegmentTermDictionary multiSegmentTermDictionary; - private final Schema.FieldInfo fieldInfo; - private final String field; - private final List ids; - - private final List ranks; - // For each segment where we have a multi-segment term dictionary, this map will contain the - // termIds of all the terms that actually appear in that segment's index. - @Nullable - private Map> termIdsPerSegment; - - // A wrap class helps to associate termId with corresponding search operator rank if exist - private final class TermRankPair { - private final int termId; - private final int rank; - - TermRankPair(int termId, int rank) { - this.termId = termId; - this.rank = rank; - } - - public int getTermId() { - return termId; - } - - public int getRank() { - return rank; - } - } - - @VisibleForTesting - public UserIdMultiSegmentQuery( - List ids, - String field, - ImmutableSchemaInterface schemaSnapshot, - MultiSegmentTermDictionary termDictionary, - List ranks, - @Nullable HitAttributeHelper hitAttributeHelper, - @Nullable QueryTimeout queryTimeout) { - this.field = field; - this.ids = ids; - this.multiSegmentTermDictionary = termDictionary; - this.ranks = ranks; - this.hitAttributeHelper = hitAttributeHelper; - this.queryTimeout = queryTimeout; - - // check ids and ranks have same size - Preconditions.checkArgument(ranks.size() == 0 || ranks.size() == ids.size()); - // hitAttributeHelper is not null iff ranks is not empty - if (ranks.size() > 0) { - Preconditions.checkNotNull(hitAttributeHelper); - } else { - Preconditions.checkArgument(hitAttributeHelper == null); - } - - if (!schemaSnapshot.hasField(field)) { - throw new IllegalStateException("Tried to search a field which does not exist in schema"); - } - this.fieldInfo = Preconditions.checkNotNull(schemaSnapshot.getFieldInfo(field)); - - IndexedNumericFieldSettings numericFieldSettings = - fieldInfo.getFieldType().getNumericFieldSettings(); - if (numericFieldSettings == null) { - throw new IllegalStateException("Id field is not numerical"); - } - - this.useOrderPreservingEncoding = numericFieldSettings.isUseSortableEncoding(); - } - - /** - * If it hasn't been built yet, build up the map containing termIds of all the terms being - * searched, for all of the segments that are managed by the multi-segment term dictionary. - * - * We only do this once, when we have to search the first segment that's supported by our - * multi-segment term dictionary. - * - * Flow here is to: - * 1. go through all the ids being queried. - * 2. for each id, get the termIds for that term in all of the segments in the term dictionary - * 3. for all of the segments that have that term, add the termId to that segment's list of - * term ids (in the 'termIdsPerSegment' map). - */ - private void createTermIdsPerSegment() { - if (termIdsPerSegment != null) { - // already created the map - return; - } - - long start = System.nanoTime(); - - final BytesRef termRef = useOrderPreservingEncoding - ? SortableLongTermAttributeImpl.newBytesRef() - : LongTermAttributeImpl.newBytesRef(); - - termIdsPerSegment = Maps.newHashMap(); - List segmentIndexes = multiSegmentTermDictionary.getSegmentIndexes(); - - for (int idx = 0; idx < ids.size(); ++idx) { - long longTerm = ids.get(idx); - - if (useOrderPreservingEncoding) { - SortableLongTermAttributeImpl.copyLongToBytesRef(termRef, longTerm); - } else { - LongTermAttributeImpl.copyLongToBytesRef(termRef, longTerm); - } - - int[] termIds = multiSegmentTermDictionary.lookupTermIds(termRef); - Preconditions.checkState(segmentIndexes.size() == termIds.length, - "SegmentIndexes: %s, field: %s, termIds: %s", - segmentIndexes.size(), field, termIds.length); - - for (int indexId = 0; indexId < termIds.length; indexId++) { - int termId = termIds[indexId]; - if (termId != EarlybirdIndexSegmentAtomicReader.TERM_NOT_FOUND) { - InvertedIndex fieldIndex = segmentIndexes.get(indexId); - - List termIdsList = termIdsPerSegment.get(fieldIndex); - if (termIdsList == null) { - termIdsList = Lists.newArrayList(); - termIdsPerSegment.put(fieldIndex, termIdsList); - } - termIdsList.add(new TermRankPair( - termId, ranks.size() > 0 ? ranks.get(idx) : -1)); - } - } - } - - long elapsed = System.nanoTime() - start; - TERM_LOOKUP_STATS.timerIncrement(elapsed); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new UserIdMultiSegmentQueryWeight(searcher, scoreMode, boost); - } - - @Override - public int hashCode() { - return Arrays.hashCode( - new Object[] {useOrderPreservingEncoding, queryTimeout, field, ids, ranks}); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof UserIdMultiSegmentQuery)) { - return false; - } - - UserIdMultiSegmentQuery query = UserIdMultiSegmentQuery.class.cast(obj); - return Arrays.equals( - new Object[] {useOrderPreservingEncoding, queryTimeout, field, ids, ranks}, - new Object[] {query.useOrderPreservingEncoding, - query.queryTimeout, - query.field, - query.ids, - query.ranks}); - } - - @Override - public String toString(String fieldName) { - StringBuilder builder = new StringBuilder(); - builder.append(getClass().getSimpleName()).append("[").append(fieldName).append(":"); - for (Long id : this.ids) { - builder.append(id); - builder.append(","); - } - builder.setLength(builder.length() - 1); - builder.append("]"); - return builder.toString(); - } - - private final class UserIdMultiSegmentQueryWeight extends ConstantScoreWeight { - private final IndexSearcher searcher; - private final ScoreMode scoreMode; - - private UserIdMultiSegmentQueryWeight( - IndexSearcher searcher, - ScoreMode scoreMode, - float boost) { - super(UserIdMultiSegmentQuery.this, boost); - this.searcher = searcher; - this.scoreMode = scoreMode; - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Weight weight = rewrite(context); - if (weight != null) { - return weight.scorer(context); - } else { - return null; - } - } - - @Override - public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - Weight weight = rewrite(context); - if (weight != null) { - return weight.bulkScorer(context); - } else { - return null; - } - } - - @Override - public void extractTerms(Set terms) { - terms.addAll(ids - .stream() - .map(id -> new Term(field, LongTermAttributeImpl.copyIntoNewBytesRef(id))) - .collect(Collectors.toSet())); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return true; - } - - private Weight rewrite(LeafReaderContext context) throws IOException { - final Terms terms = context.reader().terms(field); - if (terms == null) { - // field does not exist - return null; - } - final TermsEnum termsEnum = terms.iterator(); - Preconditions.checkNotNull(termsEnum, "No termsEnum for field: %s", field); - - BooleanQuery bq; - // See if the segment is supported by the multi-segment term dictionary. If so, build up - // the query using the termIds from the multi-segment term dictionary. - // If not (for the current segment), do the term lookups directly in the queried segment. - InvertedIndex fieldIndex = getFieldIndexFromMultiTermDictionary(context); - if (fieldIndex != null) { - createTermIdsPerSegment(); - - USED_MULTI_SEGMENT_TERM_DICTIONARY_COUNT.increment(); - SearchTimer timer = QUERY_FROM_PRECOMPUTED.startNewTimer(); - bq = addPrecomputedTermQueries(fieldIndex, termsEnum); - QUERY_FROM_PRECOMPUTED.stopTimerAndIncrement(timer); - } else { - USED_ORIGINAL_TERM_DICTIONARY_COUNT.increment(); - // This segment is not supported by the multi-segment term dictionary. Lookup terms - // directly. - SearchTimer timer = QUERY_REGULAR.startNewTimer(); - bq = addTermQueries(termsEnum); - QUERY_REGULAR.stopTimerAndIncrement(timer); - } - - return searcher.rewrite(new ConstantScoreQuery(bq)).createWeight( - searcher, scoreMode, score()); - } - - /** - * If the multi-segment term dictionary supports this segment/LeafReader, then return the - * InvertedIndex representing this segment. - * - * If the segment being queried right now is not in the multi-segment term dictionary (e.g. - * if it's not optimized yet), return null. - */ - @Nullable - private InvertedIndex getFieldIndexFromMultiTermDictionary(LeafReaderContext context) - throws IOException { - if (multiSegmentTermDictionary == null) { - return null; - } - - if (context.reader() instanceof EarlybirdIndexSegmentAtomicReader) { - EarlybirdIndexSegmentAtomicReader reader = - (EarlybirdIndexSegmentAtomicReader) context.reader(); - - EarlybirdIndexSegmentData segmentData = reader.getSegmentData(); - InvertedIndex fieldIndex = segmentData.getFieldIndex(field); - - if (multiSegmentTermDictionary.supportSegmentIndex(fieldIndex)) { - return fieldIndex; - } - } - - return null; - } - - private BooleanQuery addPrecomputedTermQueries( - InvertedIndex fieldIndex, - TermsEnum termsEnum) throws IOException { - - BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - int numClauses = 0; - - List termRankPairs = termIdsPerSegment.get(fieldIndex); - if (termRankPairs != null) { - for (TermRankPair pair : termRankPairs) { - int termId = pair.getTermId(); - if (numClauses >= BooleanQuery.getMaxClauseCount()) { - BooleanQuery saved = bqBuilder.build(); - bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(saved, BooleanClause.Occur.SHOULD); - numClauses = 1; - } - - Query query; - if (pair.getRank() != -1) { - query = EarlybirdQueryHelper.maybeWrapWithHitAttributionCollector( - new SimpleTermQuery(termsEnum, termId), - pair.getRank(), - fieldInfo, - hitAttributeHelper); - } else { - query = new SimpleTermQuery(termsEnum, termId); - } - bqBuilder.add(EarlybirdQueryHelper.maybeWrapWithTimeout(query, queryTimeout), - BooleanClause.Occur.SHOULD); - ++numClauses; - } - } - return bqBuilder.build(); - } - - private BooleanQuery addTermQueries(TermsEnum termsEnum) throws IOException { - final BytesRef termRef = useOrderPreservingEncoding - ? SortableLongTermAttributeImpl.newBytesRef() - : LongTermAttributeImpl.newBytesRef(); - - BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - int numClauses = 0; - - for (int idx = 0; idx < ids.size(); ++idx) { - long longTerm = ids.get(idx); - if (useOrderPreservingEncoding) { - SortableLongTermAttributeImpl.copyLongToBytesRef(termRef, longTerm); - } else { - LongTermAttributeImpl.copyLongToBytesRef(termRef, longTerm); - } - - if (termsEnum.seekExact(termRef)) { - if (numClauses >= BooleanQuery.getMaxClauseCount()) { - BooleanQuery saved = bqBuilder.build(); - bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(saved, BooleanClause.Occur.SHOULD); - numClauses = 1; - } - - if (ranks.size() > 0) { - bqBuilder.add(EarlybirdQueryHelper.maybeWrapWithHitAttributionCollector( - new SimpleTermQuery(termsEnum, termsEnum.ord()), - ranks.get(idx), - fieldInfo, - hitAttributeHelper), - BooleanClause.Occur.SHOULD); - } else { - bqBuilder.add(new SimpleTermQuery(termsEnum, termsEnum.ord()), - BooleanClause.Occur.SHOULD); - } - ++numClauses; - } - } - - return bqBuilder.build(); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.docx b/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.docx new file mode 100644 index 000000000..2dfdb716a Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.java b/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.java deleted file mode 100644 index 6f66ff54d..000000000 --- a/src/java/com/twitter/search/earlybird/search/queries/UserScrubGeoFilter.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.twitter.search.earlybird.search.queries; - -import java.io.IOException; -import java.util.Objects; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; - -import com.twitter.search.common.metrics.SearchRateCounter; -import com.twitter.search.common.query.FilteredQuery; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.earlybird.common.userupdates.UserScrubGeoMap; -import com.twitter.search.earlybird.index.TweetIDMapper; - -/** - * Filter that can be used with searches over geo field postings lists in order to filter out tweets - * that have been geo scrubbed. Determines if a tweet has been geo scrubbed by comparing the - * tweet's id against the max scrubbed tweet id for that tweet's author, which is stored in the - * UserScrubGeoMap. - * - * See: go/realtime-geo-filtering - */ -public class UserScrubGeoFilter implements FilteredQuery.DocIdFilterFactory { - - private UserScrubGeoMap userScrubGeoMap; - - private final SearchRateCounter totalRequestsUsingFilterCounter = - SearchRateCounter.export("user_scrub_geo_filter_total_requests"); - - public static FilteredQuery.DocIdFilterFactory getDocIdFilterFactory( - UserScrubGeoMap userScrubGeoMap) { - return new UserScrubGeoFilter(userScrubGeoMap); - } - - public UserScrubGeoFilter(UserScrubGeoMap userScrubGeoMap) { - this.userScrubGeoMap = userScrubGeoMap; - totalRequestsUsingFilterCounter.increment(); - } - - @Override - public FilteredQuery.DocIdFilter getDocIdFilter(LeafReaderContext context) throws IOException { - // To determine if a given doc has been geo scrubbed we need two pieces of information about the - // doc: the associated tweet id and the user id of the tweet's author. We can get the tweet id - // from the TweetIDMapper for the segment we are currently searching, and we can get the user id - // of the tweet's author by looking up the doc id in the NumericDocValues for the - // FROM_USER_ID_CSF. - // - // With this information we can check the UserScrubGeoMap to find out if the tweet has been - // geo scrubbed and filter it out accordingly. - final EarlybirdIndexSegmentAtomicReader currTwitterReader = - (EarlybirdIndexSegmentAtomicReader) context.reader(); - final TweetIDMapper tweetIdMapper = - (TweetIDMapper) currTwitterReader.getSegmentData().getDocIDToTweetIDMapper(); - final NumericDocValues fromUserIdDocValues = currTwitterReader.getNumericDocValues( - EarlybirdFieldConstant.FROM_USER_ID_CSF.getFieldName()); - return (docId) -> fromUserIdDocValues.advanceExact(docId) - && !userScrubGeoMap.isTweetGeoScrubbed( - tweetIdMapper.getTweetID(docId), fromUserIdDocValues.longValue()); - } - - @Override - public String toString() { - return "UserScrubGeoFilter"; - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof UserScrubGeoMap)) { - return false; - } - - UserScrubGeoFilter filter = UserScrubGeoFilter.class.cast(obj); - // filters are considered equal as long as they are using the same UserScrubGeoMap - return Objects.equals(userScrubGeoMap, filter.userScrubGeoMap); - } - - @Override - public int hashCode() { - return userScrubGeoMap == null ? 0 : userScrubGeoMap.hashCode(); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.docx b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.docx new file mode 100644 index 000000000..659b1551c Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.java b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.java deleted file mode 100644 index 9d0e85795..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringData.java +++ /dev/null @@ -1,422 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import java.util.Arrays; -import java.util.List; - -import com.google.common.collect.Lists; - -import com.twitter.search.common.constants.SearchCardType; -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; - -public class LinearScoringData { - public static final float NO_BOOST_VALUE = 1.0f; - - // A signal value so we can tell if something is unset, also used in explanation. - public static final int UNSET_SIGNAL_VALUE = -999; - - //This is somewhat arbitrary, and is here so that we have some limit on - //how many offline experimental features we support per query - public static final int MAX_OFFLINE_EXPERIMENTAL_FIELDS = 5; - - public enum SkipReason { - NOT_SKIPPED, - ANTIGAMING, - LOW_REPUTATION, - LOW_TEXT_SCORE, - LOW_RETWEET_COUNT, - LOW_FAV_COUNT, - SOCIAL_FILTER, - LOW_FINAL_SCORE - } - - // When you add fields here, make sure you also update the clear() function. - public double luceneScore; - public double textScore; - //I am not sure why this has to be double... - public double tokenAt140DividedByNumTokensBucket; - public double userRep; - public double parusScore; - public final double[] offlineExpFeatureValues = new double[MAX_OFFLINE_EXPERIMENTAL_FIELDS]; - - // v1 engagement counters - public double retweetCountPostLog2; - public double favCountPostLog2; - public double replyCountPostLog2; - public double embedsImpressionCount; - public double embedsUrlCount; - public double videoViewCount; - - // v2 engagement counters (that have a v1 counter part) - public double retweetCountV2; - public double favCountV2; - public double replyCountV2; - public double embedsImpressionCountV2; - public double embedsUrlCountV2; - public double videoViewCountV2; - // pure v2 engagement counters, they started v2 only - public double quotedCount; - public double weightedRetweetCount; - public double weightedReplyCount; - public double weightedFavCount; - public double weightedQuoteCount; - - // card related properties - public boolean hasCard; - public byte cardType; - - public boolean hasUrl; - public boolean isReply; - public boolean isRetweet; - public boolean isOffensive; - public boolean hasTrend; - public boolean isFromVerifiedAccount; - public boolean isFromBlueVerifiedAccount; - public boolean isUserSpam; - public boolean isUserNSFW; - public boolean isUserBot; - public boolean isUserAntiSocial; - public boolean hasVisibleLink; - - public double luceneContrib; - public double reputationContrib; - public double textScoreContrib; - public double favContrib; - public double replyContrib; - public double multipleReplyContrib; - public double retweetContrib; - public double parusContrib; - public final double[] offlineExpFeatureContributions = - new double[MAX_OFFLINE_EXPERIMENTAL_FIELDS]; - public double embedsImpressionContrib; - public double embedsUrlContrib; - public double videoViewContrib; - public double quotedContrib; - - public double hasUrlContrib; - public double isReplyContrib; - public double isFollowRetweetContrib; - public double isTrustedRetweetContrib; - - // Value passed in the request (ThriftRankingParams.querySpecificScoreAdjustments) - public double querySpecificScore; - - // Value passed in the request (ThriftRankingParams.authorSpecificScoreAdjustments) - public double authorSpecificScore; - - public double normalizedLuceneScore; - - public int tweetLangId; - public double uiLangMult; - public double userLangMult; - public boolean hasDifferentLang; - public boolean hasEnglishTweetAndDifferentUILang; - public boolean hasEnglishUIAndDifferentTweetLang; - - public int tweetAgeInSeconds; - public double ageDecayMult; - - // Intermediate scores - public double scoreBeforeBoost; - public double scoreAfterBoost; - public double scoreFinal; - public double scoreReturned; - - public SkipReason skipReason; - - public boolean isTrusted; - public boolean isFollow; - public boolean spamUserDampApplied; - public boolean nsfwUserDampApplied; - public boolean botUserDampApplied; - public boolean trustedCircleBoostApplied; - public boolean directFollowBoostApplied; - public boolean outOfNetworkReplyPenaltyApplied; - public boolean hasMultipleHashtagsOrTrends; - - public boolean tweetHasTrendsBoostApplied; - public boolean tweetFromVerifiedAccountBoostApplied; - public boolean tweetFromBlueVerifiedAccountBoostApplied; - public boolean hasCardBoostApplied; - public boolean cardDomainMatchBoostApplied; - public boolean cardAuthorMatchBoostApplied; - public boolean cardTitleMatchBoostApplied; - public boolean cardDescriptionMatchBoostApplied; - - public List hitFields; - public boolean hasNoTextHitDemotionApplied; - public boolean hasUrlOnlyHitDemotionApplied; - public boolean hasNameOnlyHitDemotionApplied; - public boolean hasSeparateTextAndNameHitDemotionApplied; - public boolean hasSeparateTextAndUrlHitDemotionApplied; - - public long fromUserId; - // This is actually retweet status ID, or the ID of the original tweet being (natively) retweeted - public long sharedStatusId; - public long referenceAuthorId; // SEARCH-8564 - - public boolean isSelfTweet; - public boolean selfTweetBoostApplied; - public double selfTweetMult; - - public boolean hasImageUrl; - public boolean hasVideoUrl; - public boolean hasMedialUrlBoostApplied; - public boolean hasNewsUrl; - public boolean hasNewsUrlBoostApplied; - - public boolean hasConsumerVideo; - public boolean hasProVideo; - public boolean hasVine; - public boolean hasPeriscope; - public boolean hasNativeImage; - public boolean isNullcast; - public boolean hasQuote; - - public boolean isSensitiveContent; - public boolean hasMultipleMediaFlag; - public boolean profileIsEggFlag; - public boolean isUserNewFlag; - - public int numMentions; - public int numHashtags; - public int linkLanguage; - public int prevUserTweetEngagement; - - public boolean isComposerSourceCamera; - - // health model scores by HML - public double toxicityScore; // go/toxicity - public double pBlockScore; // go/pblock - public double pSpammyTweetScore; // go/pspammytweet - public double pReportedTweetScore; // go/preportedtweet - public double spammyTweetContentScore; // go/spammy-tweet-content - public double experimentalHealthModelScore1; - public double experimentalHealthModelScore2; - public double experimentalHealthModelScore3; - public double experimentalHealthModelScore4; - - public LinearScoringData() { - hitFields = Lists.newArrayList(); - clear(); - } - - // the following three counters were added later and they got denormalized in standard way, - // you can choose to apply scalding (for legacy LinearScoringFunction) or - // not apply (for returning in metadata and display in debug). - public double getEmbedsImpressionCount(boolean scaleForScoring) { - return scaleForScoring ? logWith0(embedsImpressionCount) : embedsImpressionCount; - } - public double getEmbedsUrlCount(boolean scaleForScoring) { - return scaleForScoring ? logWith0(embedsUrlCount) : embedsUrlCount; - } - public double getVideoViewCount(boolean scaleForScoring) { - return scaleForScoring ? logWith0(videoViewCount) : videoViewCount; - } - private static double logWith0(double value) { - return value > 0 ? Math.log(value) : 0.0; - } - - /** - * Returns a string description of all data stored in this instance. - */ - public String getPropertyExplanation() { - StringBuilder sb = new StringBuilder(); - sb.append(hasCard ? "CARD " + SearchCardType.cardTypeFromByteValue(cardType) : ""); - sb.append(hasUrl ? "URL " : ""); - sb.append(isReply ? "REPLY " : ""); - sb.append(isRetweet ? "RETWEET " : ""); - sb.append(isOffensive ? "OFFENSIVE " : ""); - sb.append(hasTrend ? "TREND " : ""); - sb.append(hasMultipleHashtagsOrTrends ? "HASHTAG/TREND+ " : ""); - sb.append(isFromVerifiedAccount ? "VERIFIED " : ""); - sb.append(isFromBlueVerifiedAccount ? "BLUE_VERIFIED " : ""); - sb.append(isUserSpam ? "SPAM " : ""); - sb.append(isUserNSFW ? "NSFW " : ""); - sb.append(isUserBot ? "BOT " : ""); - sb.append(isUserAntiSocial ? "ANTISOCIAL " : ""); - sb.append(isTrusted ? "TRUSTED " : ""); - sb.append(isFollow ? "FOLLOW " : ""); - sb.append(isSelfTweet ? "SELF " : ""); - sb.append(hasImageUrl ? "IMAGE " : ""); - sb.append(hasVideoUrl ? "VIDEO " : ""); - sb.append(hasNewsUrl ? "NEWS " : ""); - sb.append(isNullcast ? "NULLCAST" : ""); - sb.append(hasQuote ? "QUOTE" : ""); - sb.append(isComposerSourceCamera ? "Composer Source: CAMERA" : ""); - sb.append(favCountPostLog2 > 0 ? "Faves:" + favCountPostLog2 + " " : ""); - sb.append(retweetCountPostLog2 > 0 ? "Retweets:" + retweetCountPostLog2 + " " : ""); - sb.append(replyCountPostLog2 > 0 ? "Replies:" + replyCountPostLog2 + " " : ""); - sb.append(getEmbedsImpressionCount(false) > 0 - ? "Embedded Imps:" + getEmbedsImpressionCount(false) + " " : ""); - sb.append(getEmbedsUrlCount(false) > 0 - ? "Embedded Urls:" + getEmbedsUrlCount(false) + " " : ""); - sb.append(getVideoViewCount(false) > 0 - ? "Video views:" + getVideoViewCount(false) + " " : ""); - sb.append(weightedRetweetCount > 0 ? "Weighted Retweets:" - + ((int) weightedRetweetCount) + " " : ""); - sb.append(weightedReplyCount > 0 - ? "Weighted Replies:" + ((int) weightedReplyCount) + " " : ""); - sb.append(weightedFavCount > 0 - ? "Weighted Faves:" + ((int) weightedFavCount) + " " : ""); - sb.append(weightedQuoteCount > 0 - ? "Weighted Quotes:" + ((int) weightedQuoteCount) + " " : ""); - return sb.toString(); - } - - /** - * Resets all data stored in this instance. - */ - public void clear() { - luceneScore = UNSET_SIGNAL_VALUE; - textScore = UNSET_SIGNAL_VALUE; - tokenAt140DividedByNumTokensBucket = UNSET_SIGNAL_VALUE; - userRep = UNSET_SIGNAL_VALUE; - retweetCountPostLog2 = UNSET_SIGNAL_VALUE; - favCountPostLog2 = UNSET_SIGNAL_VALUE; - replyCountPostLog2 = UNSET_SIGNAL_VALUE; - parusScore = UNSET_SIGNAL_VALUE; - Arrays.fill(offlineExpFeatureValues, 0); - embedsImpressionCount = UNSET_SIGNAL_VALUE; - embedsUrlCount = UNSET_SIGNAL_VALUE; - videoViewCount = UNSET_SIGNAL_VALUE; - // v2 engagement, these each have a v1 counterpart - retweetCountV2 = UNSET_SIGNAL_VALUE; - favCountV2 = UNSET_SIGNAL_VALUE; - replyCountV2 = UNSET_SIGNAL_VALUE; - embedsImpressionCountV2 = UNSET_SIGNAL_VALUE; - embedsUrlCountV2 = UNSET_SIGNAL_VALUE; - videoViewCountV2 = UNSET_SIGNAL_VALUE; - // new engagement counters, they only have one version with the v2 normalizer - quotedCount = UNSET_SIGNAL_VALUE; - weightedRetweetCount = UNSET_SIGNAL_VALUE; - weightedReplyCount = UNSET_SIGNAL_VALUE; - weightedFavCount = UNSET_SIGNAL_VALUE; - weightedQuoteCount = UNSET_SIGNAL_VALUE; - - hasUrl = false; - isReply = false; - isRetweet = false; - isOffensive = false; - hasTrend = false; - isFromVerifiedAccount = false; - isFromBlueVerifiedAccount = false; - isUserSpam = false; - isUserNSFW = false; - isUserBot = false; - isUserAntiSocial = false; - hasVisibleLink = false; - isNullcast = false; - - luceneContrib = UNSET_SIGNAL_VALUE; - reputationContrib = UNSET_SIGNAL_VALUE; - textScoreContrib = UNSET_SIGNAL_VALUE; - replyContrib = UNSET_SIGNAL_VALUE; - multipleReplyContrib = UNSET_SIGNAL_VALUE; - retweetContrib = UNSET_SIGNAL_VALUE; - favContrib = UNSET_SIGNAL_VALUE; - parusContrib = UNSET_SIGNAL_VALUE; - Arrays.fill(offlineExpFeatureContributions, 0); - embedsImpressionContrib = UNSET_SIGNAL_VALUE; - embedsUrlContrib = UNSET_SIGNAL_VALUE; - videoViewContrib = UNSET_SIGNAL_VALUE; - hasUrlContrib = UNSET_SIGNAL_VALUE; - isReplyContrib = UNSET_SIGNAL_VALUE; - - querySpecificScore = UNSET_SIGNAL_VALUE; - authorSpecificScore = UNSET_SIGNAL_VALUE; - - normalizedLuceneScore = NO_BOOST_VALUE; - - tweetLangId = ThriftLanguage.UNKNOWN.getValue(); - uiLangMult = NO_BOOST_VALUE; - userLangMult = NO_BOOST_VALUE; - hasDifferentLang = false; - hasEnglishTweetAndDifferentUILang = false; - hasEnglishUIAndDifferentTweetLang = false; - - tweetAgeInSeconds = 0; - ageDecayMult = NO_BOOST_VALUE; - - // Intermediate scores - scoreBeforeBoost = UNSET_SIGNAL_VALUE; - scoreAfterBoost = UNSET_SIGNAL_VALUE; - scoreFinal = UNSET_SIGNAL_VALUE; - scoreReturned = UNSET_SIGNAL_VALUE; - - skipReason = SkipReason.NOT_SKIPPED; - - isTrusted = false; // Set later - isFollow = false; // Set later - trustedCircleBoostApplied = false; - directFollowBoostApplied = false; - outOfNetworkReplyPenaltyApplied = false; - hasMultipleHashtagsOrTrends = false; - spamUserDampApplied = false; - nsfwUserDampApplied = false; - botUserDampApplied = false; - - tweetHasTrendsBoostApplied = false; - tweetFromVerifiedAccountBoostApplied = false; - tweetFromBlueVerifiedAccountBoostApplied = false; - - fromUserId = UNSET_SIGNAL_VALUE; - sharedStatusId = UNSET_SIGNAL_VALUE; - referenceAuthorId = UNSET_SIGNAL_VALUE; - - isSelfTweet = false; - selfTweetBoostApplied = false; - selfTweetMult = NO_BOOST_VALUE; - - trustedCircleBoostApplied = false; - directFollowBoostApplied = false; - - hasImageUrl = false; - hasVideoUrl = false; - hasMedialUrlBoostApplied = false; - hasNewsUrl = false; - hasNewsUrlBoostApplied = false; - - hasCard = false; - cardType = SearchCardType.UNKNOWN.getByteValue(); - hasCardBoostApplied = false; - cardDomainMatchBoostApplied = false; - cardAuthorMatchBoostApplied = false; - cardTitleMatchBoostApplied = false; - cardDescriptionMatchBoostApplied = false; - - hitFields.clear(); - hasNoTextHitDemotionApplied = false; - hasUrlOnlyHitDemotionApplied = false; - hasNameOnlyHitDemotionApplied = false; - hasSeparateTextAndNameHitDemotionApplied = false; - hasSeparateTextAndUrlHitDemotionApplied = false; - - hasConsumerVideo = false; - hasProVideo = false; - hasVine = false; - hasPeriscope = false; - hasNativeImage = false; - - isSensitiveContent = false; - hasMultipleMediaFlag = false; - profileIsEggFlag = false; - numMentions = 0; - numHashtags = 0; - isUserNewFlag = false; - linkLanguage = 0; - prevUserTweetEngagement = 0; - - isComposerSourceCamera = false; - - // health model scores by HML - toxicityScore = UNSET_SIGNAL_VALUE; - pBlockScore = UNSET_SIGNAL_VALUE; - pSpammyTweetScore = UNSET_SIGNAL_VALUE; - pReportedTweetScore = UNSET_SIGNAL_VALUE; - spammyTweetContentScore = UNSET_SIGNAL_VALUE; - experimentalHealthModelScore1 = UNSET_SIGNAL_VALUE; - experimentalHealthModelScore2 = UNSET_SIGNAL_VALUE; - experimentalHealthModelScore3 = UNSET_SIGNAL_VALUE; - experimentalHealthModelScore4 = UNSET_SIGNAL_VALUE; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.docx b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.docx new file mode 100644 index 000000000..6bfab1e4a Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.java b/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.java deleted file mode 100644 index 9c049068c..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/LinearScoringParams.java +++ /dev/null @@ -1,304 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import java.util.Arrays; -import java.util.Map; - -import com.google.common.annotations.VisibleForTesting; - -import com.twitter.search.common.constants.SearchCardType; -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.ranking.thriftjava.ThriftAgeDecayRankingParams; -import com.twitter.search.common.ranking.thriftjava.ThriftCardRankingParams; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.util.lang.ThriftLanguageUtil; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSocialFilterType; - -/* - * The class for all query specific parameters, including the parameters from the relevanceOptions and - * values that are extracted from the request itself. - */ -public class LinearScoringParams { - - public static final double DEFAULT_FEATURE_WEIGHT = 0; - public static final double DEFAULT_FEATURE_MIN_VAL = 0; - public static final double DEFAULT_NO_BOOST = 1.0; - @VisibleForTesting - static final SearchCounter NULL_USER_LANGS_KEY = - SearchCounter.export("linear_scoring_params_null_user_langs_key"); - - public final double luceneWeight; - public final double textScoreWeight; - public final double textScoreMinVal; - public final double retweetWeight; - public final double retweetMinVal; - public final double favWeight; - public final double favMinVal; - public final double replyWeight; - public final double multipleReplyWeight; - public final double multipleReplyMinVal; - public final double isReplyWeight; - public final double parusWeight; - public final double embedsImpressionWeight; - public final double embedsUrlWeight; - public final double videoViewWeight; - public final double quotedCountWeight; - - public final double[] rankingOfflineExpWeights = - new double[LinearScoringData.MAX_OFFLINE_EXPERIMENTAL_FIELDS]; - - public final boolean applyBoosts; - - // Storing ranking params for cards, avoid using maps for faster lookup - public final double[] hasCardBoosts = new double[SearchCardType.values().length]; - public final double[] cardDomainMatchBoosts = new double[SearchCardType.values().length]; - public final double[] cardAuthorMatchBoosts = new double[SearchCardType.values().length]; - public final double[] cardTitleMatchBoosts = new double[SearchCardType.values().length]; - public final double[] cardDescriptionMatchBoosts = new double[SearchCardType.values().length]; - - public final double urlWeight; - public final double reputationWeight; - public final double reputationMinVal; - public final double followRetweetWeight; - public final double trustedRetweetWeight; - - // Adjustments for specific tweets (tweetId -> score) - public final Map querySpecificScoreAdjustments; - - // Adjustments for tweets posted by specific authors (userId -> score) - public final Map authorSpecificScoreAdjustments; - - public final double offensiveDamping; - public final double spamUserDamping; - public final double nsfwUserDamping; - public final double botUserDamping; - public final double trustedCircleBoost; - public final double directFollowBoost; - public final double minScore; - - public final boolean applyFiltersAlways; - - public final boolean useLuceneScoreAsBoost; - public final double maxLuceneScoreBoost; - - public final double langEnglishTweetDemote; - public final double langEnglishUIDemote; - public final double langDefaultDemote; - public final boolean useUserLanguageInfo; - public final double unknownLanguageBoost; - - public final double outOfNetworkReplyPenalty; - - public final boolean useAgeDecay; - public final double ageDecayHalflife; - public final double ageDecayBase; - public final double ageDecaySlope; - - // hit attribute demotions - public final boolean enableHitDemotion; - public final double noTextHitDemotion; - public final double urlOnlyHitDemotion; - public final double nameOnlyHitDemotion; - public final double separateTextAndNameHitDemotion; - public final double separateTextAndUrlHitDemotion; - - // trends related params - public final double tweetHasTrendBoost; - public final double multipleHashtagsOrTrendsDamping; - - public final double tweetFromVerifiedAccountBoost; - - public final double tweetFromBlueVerifiedAccountBoost; - - public final ThriftSocialFilterType socialFilterType; - public final int uiLangId; - // Confidences of the understandability of different languages for this user. - public final double[] userLangs = new double[ThriftLanguage.values().length]; - - public final long searcherId; - public final double selfTweetBoost; - - public final double tweetHasMediaUrlBoost; - public final double tweetHasNewsUrlBoost; - - // whether we need meta-data for replies what the reply is to. - public final boolean getInReplyToStatusId; - - // Initialize from a ranking parameter - public LinearScoringParams(ThriftSearchQuery searchQuery, ThriftRankingParams params) { - // weights - luceneWeight = params.isSetLuceneScoreParams() - ? params.getLuceneScoreParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - textScoreWeight = params.isSetTextScoreParams() - ? params.getTextScoreParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - retweetWeight = params.isSetRetweetCountParams() - ? params.getRetweetCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - favWeight = params.isSetFavCountParams() - ? params.getFavCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - replyWeight = params.isSetReplyCountParams() - ? params.getReplyCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - multipleReplyWeight = params.isSetMultipleReplyCountParams() - ? params.getMultipleReplyCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - parusWeight = params.isSetParusScoreParams() - ? params.getParusScoreParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - for (int i = 0; i < LinearScoringData.MAX_OFFLINE_EXPERIMENTAL_FIELDS; i++) { - Byte featureTypeByte = (byte) i; - // default weight is 0, thus contribution for unset feature value will be 0. - rankingOfflineExpWeights[i] = params.getOfflineExperimentalFeatureRankingParamsSize() > 0 - && params.getOfflineExperimentalFeatureRankingParams().containsKey(featureTypeByte) - ? params.getOfflineExperimentalFeatureRankingParams().get(featureTypeByte).getWeight() - : DEFAULT_FEATURE_WEIGHT; - } - embedsImpressionWeight = params.isSetEmbedsImpressionCountParams() - ? params.getEmbedsImpressionCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - embedsUrlWeight = params.isSetEmbedsUrlCountParams() - ? params.getEmbedsUrlCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - videoViewWeight = params.isSetVideoViewCountParams() - ? params.getVideoViewCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - quotedCountWeight = params.isSetQuotedCountParams() - ? params.getQuotedCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - - applyBoosts = params.isApplyBoosts(); - - // configure card values - Arrays.fill(hasCardBoosts, DEFAULT_NO_BOOST); - Arrays.fill(cardAuthorMatchBoosts, DEFAULT_NO_BOOST); - Arrays.fill(cardDomainMatchBoosts, DEFAULT_NO_BOOST); - Arrays.fill(cardTitleMatchBoosts, DEFAULT_NO_BOOST); - Arrays.fill(cardDescriptionMatchBoosts, DEFAULT_NO_BOOST); - if (params.isSetCardRankingParams()) { - for (SearchCardType cardType : SearchCardType.values()) { - byte cardTypeIndex = cardType.getByteValue(); - ThriftCardRankingParams rankingParams = params.getCardRankingParams().get(cardTypeIndex); - if (rankingParams != null) { - hasCardBoosts[cardTypeIndex] = rankingParams.getHasCardBoost(); - cardAuthorMatchBoosts[cardTypeIndex] = rankingParams.getAuthorMatchBoost(); - cardDomainMatchBoosts[cardTypeIndex] = rankingParams.getDomainMatchBoost(); - cardTitleMatchBoosts[cardTypeIndex] = rankingParams.getTitleMatchBoost(); - cardDescriptionMatchBoosts[cardTypeIndex] = rankingParams.getDescriptionMatchBoost(); - } - } - } - - urlWeight = params.isSetUrlParams() - ? params.getUrlParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - reputationWeight = params.isSetReputationParams() - ? params.getReputationParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - isReplyWeight = params.isSetIsReplyParams() - ? params.getIsReplyParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - followRetweetWeight = params.isSetDirectFollowRetweetCountParams() - ? params.getDirectFollowRetweetCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - trustedRetweetWeight = params.isSetTrustedCircleRetweetCountParams() - ? params.getTrustedCircleRetweetCountParams().getWeight() : DEFAULT_FEATURE_WEIGHT; - - querySpecificScoreAdjustments = params.getQuerySpecificScoreAdjustments(); - authorSpecificScoreAdjustments = params.getAuthorSpecificScoreAdjustments(); - - // min/max filters - textScoreMinVal = params.isSetTextScoreParams() - ? params.getTextScoreParams().getMin() : DEFAULT_FEATURE_MIN_VAL; - reputationMinVal = params.isSetReputationParams() - ? params.getReputationParams().getMin() : DEFAULT_FEATURE_MIN_VAL; - multipleReplyMinVal = params.isSetMultipleReplyCountParams() - ? params.getMultipleReplyCountParams().getMin() : DEFAULT_FEATURE_MIN_VAL; - retweetMinVal = params.isSetRetweetCountParams() && params.getRetweetCountParams().isSetMin() - ? params.getRetweetCountParams().getMin() : DEFAULT_FEATURE_MIN_VAL; - favMinVal = params.isSetFavCountParams() && params.getFavCountParams().isSetMin() - ? params.getFavCountParams().getMin() : DEFAULT_FEATURE_MIN_VAL; - - // boosts - spamUserDamping = params.isSetSpamUserBoost() ? params.getSpamUserBoost() : 1.0; - nsfwUserDamping = params.isSetNsfwUserBoost() ? params.getNsfwUserBoost() : 1.0; - botUserDamping = params.isSetBotUserBoost() ? params.getBotUserBoost() : 1.0; - offensiveDamping = params.getOffensiveBoost(); - trustedCircleBoost = params.getInTrustedCircleBoost(); - directFollowBoost = params.getInDirectFollowBoost(); - - // language boosts - langEnglishTweetDemote = params.getLangEnglishTweetBoost(); - langEnglishUIDemote = params.getLangEnglishUIBoost(); - langDefaultDemote = params.getLangDefaultBoost(); - useUserLanguageInfo = params.isUseUserLanguageInfo(); - unknownLanguageBoost = params.getUnknownLanguageBoost(); - - // hit demotions - enableHitDemotion = params.isEnableHitDemotion(); - noTextHitDemotion = params.getNoTextHitDemotion(); - urlOnlyHitDemotion = params.getUrlOnlyHitDemotion(); - nameOnlyHitDemotion = params.getNameOnlyHitDemotion(); - separateTextAndNameHitDemotion = params.getSeparateTextAndNameHitDemotion(); - separateTextAndUrlHitDemotion = params.getSeparateTextAndUrlHitDemotion(); - - outOfNetworkReplyPenalty = params.getOutOfNetworkReplyPenalty(); - - if (params.isSetAgeDecayParams()) { - // new age decay settings - ThriftAgeDecayRankingParams ageDecayParams = params.getAgeDecayParams(); - ageDecaySlope = ageDecayParams.getSlope(); - ageDecayHalflife = ageDecayParams.getHalflife(); - ageDecayBase = ageDecayParams.getBase(); - useAgeDecay = true; - } else if (params.isSetDeprecatedAgeDecayBase() - && params.isSetDeprecatedAgeDecayHalflife() - && params.isSetDeprecatedAgeDecaySlope()) { - ageDecaySlope = params.getDeprecatedAgeDecaySlope(); - ageDecayHalflife = params.getDeprecatedAgeDecayHalflife(); - ageDecayBase = params.getDeprecatedAgeDecayBase(); - useAgeDecay = true; - } else { - ageDecaySlope = 0.0; - ageDecayHalflife = 0.0; - ageDecayBase = 0.0; - useAgeDecay = false; - } - - // trends - tweetHasTrendBoost = params.getTweetHasTrendBoost(); - multipleHashtagsOrTrendsDamping = params.getMultipleHashtagsOrTrendsBoost(); - - // verified accounts - tweetFromVerifiedAccountBoost = params.getTweetFromVerifiedAccountBoost(); - tweetFromBlueVerifiedAccountBoost = params.getTweetFromBlueVerifiedAccountBoost(); - - // score filter - minScore = params.getMinScore(); - - applyFiltersAlways = params.isApplyFiltersAlways(); - - useLuceneScoreAsBoost = params.isUseLuceneScoreAsBoost(); - maxLuceneScoreBoost = params.getMaxLuceneScoreBoost(); - - searcherId = searchQuery.isSetSearcherId() ? searchQuery.getSearcherId() : -1; - selfTweetBoost = params.getSelfTweetBoost(); - - socialFilterType = searchQuery.getSocialFilterType(); - - // the UI language and the confidences of the languages user can understand. - if (!searchQuery.isSetUiLang() || searchQuery.getUiLang().isEmpty()) { - uiLangId = ThriftLanguage.UNKNOWN.getValue(); - } else { - uiLangId = ThriftLanguageUtil.getThriftLanguageOf(searchQuery.getUiLang()).getValue(); - } - if (searchQuery.getUserLangsSize() > 0) { - for (Map.Entry lang : searchQuery.getUserLangs().entrySet()) { - ThriftLanguage thriftLanguage = lang.getKey(); - // SEARCH-13441 - if (thriftLanguage != null) { - userLangs[thriftLanguage.getValue()] = lang.getValue(); - } else { - NULL_USER_LANGS_KEY.increment(); - } - } - } - - // For now, we will use the same boost for both image, and video. - tweetHasMediaUrlBoost = params.getTweetHasImageUrlBoost(); - tweetHasNewsUrlBoost = params.getTweetHasNewsUrlBoost(); - - getInReplyToStatusId = - searchQuery.isSetResultMetadataOptions() - && searchQuery.getResultMetadataOptions().isSetGetInReplyToStatusId() - && searchQuery.getResultMetadataOptions().isGetInReplyToStatusId(); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.docx b/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.docx new file mode 100644 index 000000000..17dd20837 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.java b/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.java deleted file mode 100644 index c3c3e3861..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/MinFeatureValueFilter.java +++ /dev/null @@ -1,163 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import java.io.IOException; -import java.util.Objects; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.encoding.features.ByteNormalizer; -import com.twitter.search.common.encoding.features.ClampByteNormalizer; -import com.twitter.search.common.encoding.features.SingleBytePositiveFloatNormalizer; -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.query.FilteredQuery; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; - -public final class MinFeatureValueFilter extends Query implements FilteredQuery.DocIdFilterFactory { - private final String featureName; - private final ByteNormalizer normalizer; - private final double minValue; - - /** - * Creates a query that filters out all hits that have a value smaller than the given threshold - * for the given feature. - * - * @param featureName The feature. - * @param minValue The threshold for the feature values. - * @return A query that filters out all hits that have a value smaller than the given threshold - * for the given feature. - */ - public static Query getMinFeatureValueFilter(String featureName, double minValue) { - return new BooleanQuery.Builder() - .add(new MinFeatureValueFilter(featureName, minValue), BooleanClause.Occur.FILTER) - .build(); - } - - public static FilteredQuery.DocIdFilterFactory getDocIdFilterFactory( - String featureName, double minValue) { - return new MinFeatureValueFilter(featureName, minValue); - } - - /** - * Returns the normalizer that should be used to normalize the values for the given feature. - * - * @param featureName The feature. - * @return The normalizer that should be used to normalize the values for the given feature. - */ - @VisibleForTesting - public static ByteNormalizer getMinFeatureValueNormalizer(String featureName) { - if (featureName.equals(EarlybirdFieldConstant.USER_REPUTATION.getFieldName())) { - return new ClampByteNormalizer(0, 100); - } - - if (featureName.equals(EarlybirdFieldConstant.FAVORITE_COUNT.getFieldName()) - || featureName.equals(EarlybirdFieldConstant.PARUS_SCORE.getFieldName()) - || featureName.equals(EarlybirdFieldConstant.REPLY_COUNT.getFieldName()) - || featureName.equals(EarlybirdFieldConstant.RETWEET_COUNT.getFieldName())) { - return new SingleBytePositiveFloatNormalizer(); - } - - throw new IllegalArgumentException("Unknown normalization method for field " + featureName); - } - - @Override - public int hashCode() { - // Probably doesn't make sense to include the schemaSnapshot and normalizer here. - return (int) ((featureName == null ? 0 : featureName.hashCode() * 7) + minValue); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof MinFeatureValueFilter)) { - return false; - } - - // Probably doesn't make sense to include the schemaSnapshot and normalizer here. - MinFeatureValueFilter filter = MinFeatureValueFilter.class.cast(obj); - return Objects.equals(featureName, filter.featureName) && (minValue == filter.minValue); - } - - @Override - public String toString(String field) { - return String.format("MinFeatureValueFilter(%s, %f)", featureName, minValue); - } - - private MinFeatureValueFilter(String featureName, double minValue) { - this.featureName = featureName; - this.normalizer = getMinFeatureValueNormalizer(featureName); - this.minValue = normalizer.normalize(minValue); - } - - @Override - public FilteredQuery.DocIdFilter getDocIdFilter(LeafReaderContext context) throws IOException { - final NumericDocValues featureDocValues = context.reader().getNumericDocValues(featureName); - return (docId) -> featureDocValues.advanceExact(docId) - && ((byte) featureDocValues.longValue() >= minValue); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - return new MinFeatureValueDocIdSetIterator( - context.reader(), featureName, minValue); - } - }; - } - - private static final class MinFeatureValueDocIdSetIterator extends RangeFilterDISI { - private final NumericDocValues featureDocValues; - private final double minValue; - - MinFeatureValueDocIdSetIterator(LeafReader indexReader, - String featureName, - double minValue) throws IOException { - super(indexReader); - this.featureDocValues = indexReader.getNumericDocValues(featureName); - this.minValue = minValue; - } - - @Override - public boolean shouldReturnDoc() throws IOException { - // We need this explicit casting to byte, because of how we encode and decode features in our - // encoded_tweet_features field. If a feature is an int (uses all 32 bits of the int), then - // encoding the feature and then decoding it preserves its original value. However, if the - // feature does not use the entire int (and especially if it uses bits somewhere in the middle - // of the int), then the feature value is assumed to be unsigned when it goes through this - // process of encoding and decoding. So a user rep of - // RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL (-128) will be correctly encoded as the - // binary value 10000000, but will be treated as an unsigned value when decoded, and therefore - // the decoded value will be 128. - // - // In retrospect, this seems like a really poor design decision. It seems like it would be - // better if all feature values were considered to be signed, even if most features can never - // have negative values. Unfortunately, making this change is not easy, because some features - // store normalized values, so we would also need to change the range of allowed values - // produced by those normalizers, as well as all code that depends on those values. - // - // So for now, just cast this value to a byte, to get the proper negative value. - return featureDocValues.advanceExact(docID()) - && ((byte) featureDocValues.longValue() >= minValue); - } - } - - public double getMinValue() { - return minValue; - } - - public ByteNormalizer getNormalizer() { - return normalizer; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.docx b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.docx new file mode 100644 index 000000000..0c4a4cf46 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.java b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.java deleted file mode 100644 index abf312d9f..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceHit.java +++ /dev/null @@ -1,104 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import java.util.Comparator; - -import javax.annotation.Nullable; - -import com.google.common.base.Preconditions; - -import com.twitter.common_internal.collections.RandomAccessPriorityQueue; -import com.twitter.search.common.relevance.features.TweetIntegerShingleSignature; -import com.twitter.search.earlybird.search.Hit; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; - -public class RelevanceHit extends Hit - implements RandomAccessPriorityQueue.SignatureProvider { - @Nullable - private TweetIntegerShingleSignature signature; - - public RelevanceHit() { - super(Long.MAX_VALUE, Long.MAX_VALUE); - } - - public RelevanceHit(long timeSliceID, long statusID, - TweetIntegerShingleSignature signature, - ThriftSearchResultMetadata metadata) { - super(timeSliceID, statusID); - update(timeSliceID, statusID, signature, metadata); - } - - /** - * Updates the data for this relevance hit. - * - * @param timeSliceID The timeslice ID of the segment that the segment came from. - * @param statusID The hit's tweet ID. - * @param tweetSignature The tweet signature generated for this hit. - * @param metadata The metadata associated with this hit. - */ - public void update(long timeSliceID, long statusID, TweetIntegerShingleSignature tweetSignature, - ThriftSearchResultMetadata metadata) { - this.statusID = statusID; - this.timeSliceID = timeSliceID; - this.metadata = Preconditions.checkNotNull(metadata); - this.signature = Preconditions.checkNotNull(tweetSignature); - } - - /** - * Returns the computed score for this hit. - */ - public float getScore() { - if (metadata != null) { - return (float) metadata.getScore(); - } else { - return ScoringFunction.SKIP_HIT; - } - } - - // We want the score as a double (and not cast to a float) for COMPARATOR_BY_SCORE and - // PQ_COMPARATOR_BY_SCORE so that the results returned from Earlybirds will be sorted based on the - // scores in the ThriftSearchResultMetadata objects (and will not lose precision by being cast to - // floats). Thus, the sorted order on Earlybirds and Earlybird Roots will be consistent. - private double getScoreDouble() { - if (metadata != null) { - return metadata.getScore(); - } else { - return (double) ScoringFunction.SKIP_HIT; - } - } - - @Override @Nullable - public TweetIntegerShingleSignature getSignature() { - return signature; - } - - @Override - public String toString() { - return "RelevanceHit[tweetID=" + statusID + ",timeSliceID=" + timeSliceID - + ",score=" + (metadata == null ? "null" : metadata.getScore()) - + ",signature=" + (signature == null ? "null" : signature) + "]"; - } - - public static final Comparator COMPARATOR_BY_SCORE = - (d1, d2) -> { - // if two docs have the same score, then the first one (most recent) wins - if (d1.getScore() == d2.getScore()) { - return Long.compare(d2.getStatusID(), d1.getStatusID()); - } - return Double.compare(d2.getScoreDouble(), d1.getScoreDouble()); - }; - - public static final Comparator PQ_COMPARATOR_BY_SCORE = - (d1, d2) -> { - // Reverse the order - return COMPARATOR_BY_SCORE.compare(d2, d1); - }; - - @Override - public void clear() { - timeSliceID = Long.MAX_VALUE; - statusID = Long.MAX_VALUE; - metadata = null; - signature = null; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.docx b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.docx new file mode 100644 index 000000000..4ae3ddc4f Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.java b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.java deleted file mode 100644 index 0b99ab0da..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchRequestInfo.java +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import com.google.common.base.Preconditions; - -import org.apache.lucene.search.Query; - -import com.twitter.search.common.search.TerminationTracker; -import com.twitter.search.earlybird.QualityFactor; -import com.twitter.search.earlybird.search.SearchRequestInfo; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchRelevanceOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; - -public class RelevanceSearchRequestInfo extends SearchRequestInfo { - private final ThriftSearchRelevanceOptions relevanceOptions; - - public RelevanceSearchRequestInfo( - ThriftSearchQuery searchQuery, Query query, - TerminationTracker terminationTracker, QualityFactor qualityFactor) { - super(addResultMetadataOptionsIfUnset(searchQuery), query, terminationTracker, qualityFactor); - this.relevanceOptions = searchQuery.getRelevanceOptions(); - } - - private static ThriftSearchQuery addResultMetadataOptionsIfUnset(ThriftSearchQuery searchQuery) { - if (!searchQuery.isSetResultMetadataOptions()) { - searchQuery.setResultMetadataOptions(new ThriftSearchResultMetadataOptions()); - } - return searchQuery; - } - - @Override - protected int calculateMaxHitsToProcess(ThriftSearchQuery thriftSearchQuery) { - ThriftSearchRelevanceOptions searchRelevanceOptions = thriftSearchQuery.getRelevanceOptions(); - - // Don't use the value from the ThriftSearchQuery object if one is provided in the - // relevance options - int requestedMaxHitsToProcess = searchRelevanceOptions.isSetMaxHitsToProcess() - ? searchRelevanceOptions.getMaxHitsToProcess() - : super.calculateMaxHitsToProcess(thriftSearchQuery); - - return qualityFactorMaxHitsToProcess(getNumResultsRequested(), requestedMaxHitsToProcess); - } - - public ThriftSearchRelevanceOptions getRelevanceOptions() { - return this.relevanceOptions; - } - - /** - * Reduces maxHitsToProcess based on quality factor. Never reduces it beyond - * numResults. - * @param numResults - * @param maxHitsToProcess - * @return Reduced maxHitsToProcess. - */ - public int qualityFactorMaxHitsToProcess(int numResults, int maxHitsToProcess) { - Preconditions.checkNotNull(qualityFactor); - - // Do not quality factor if there is no lower bound on maxHitsToProcess. - if (numResults > maxHitsToProcess) { - return maxHitsToProcess; - } - - double currentQualityFactor = qualityFactor.get(); - return Math.max(numResults, (int) (currentQualityFactor * maxHitsToProcess)); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.docx b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.docx new file mode 100644 index 000000000..605db9e29 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.java b/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.java deleted file mode 100644 index 0dc169dc9..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/RelevanceSearchResults.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import com.twitter.search.earlybird.search.Hit; -import com.twitter.search.earlybird.search.SimpleSearchResults; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -public class RelevanceSearchResults extends SimpleSearchResults { - public final ThriftSearchResultMetadata[] resultMetadata; - private ThriftSearchResultsRelevanceStats relevanceStats = null; - private long scoringTimeNanos = 0; - - public RelevanceSearchResults(int size) { - super(size); - this.resultMetadata = new ThriftSearchResultMetadata[size]; - } - - public void setHit(Hit hit, int hitIndex) { - hits[hitIndex] = hit; - resultMetadata[hitIndex] = hit.getMetadata(); - } - - public void setRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - this.relevanceStats = relevanceStats; - } - public ThriftSearchResultsRelevanceStats getRelevanceStats() { - return relevanceStats; - } - - public void setScoringTimeNanos(long scoringTimeNanos) { - this.scoringTimeNanos = scoringTimeNanos; - } - - public long getScoringTimeNanos() { - return scoringTimeNanos; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.docx b/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.docx new file mode 100644 index 000000000..6979bd258 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.java b/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.java deleted file mode 100644 index b3d6184d0..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/ScoreFilterQuery.java +++ /dev/null @@ -1,138 +0,0 @@ -package com.twitter.search.earlybird.search.relevance; - -import java.io.IOException; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; - -import com.twitter.search.common.query.DefaultFilterWeight; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.util.RangeFilterDISI; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunctionProvider; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunctionProvider.NamedScoringFunctionProvider; - -/** - * This filter only accepts documents for which the provided - * {@link com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction} - * returns a score that's greater or equal to the passed-in minScore and smaller or equal - * to maxScore. - */ -public final class ScoreFilterQuery extends Query { - private static final float DEFAULT_LUCENE_SCORE = 1.0F; - - private final float minScore; - private final float maxScore; - private final NamedScoringFunctionProvider scoringFunctionProvider; - private final ImmutableSchemaInterface schema; - - /** - * Returns a score filter. - * - * @param schema The schema to use to extract the feature scores. - * @param scoringFunctionProvider The scoring function provider. - * @param minScore The minimum score threshold. - * @param maxScore The maximum score threshold. - * @return A score filter with the given configuration. - */ - public static Query getScoreFilterQuery( - ImmutableSchemaInterface schema, - NamedScoringFunctionProvider scoringFunctionProvider, - float minScore, - float maxScore) { - return new BooleanQuery.Builder() - .add(new ScoreFilterQuery(schema, scoringFunctionProvider, minScore, maxScore), - BooleanClause.Occur.FILTER) - .build(); - } - - private ScoreFilterQuery(ImmutableSchemaInterface schema, - NamedScoringFunctionProvider scoringFunctionProvider, - float minScore, - float maxScore) { - this.schema = schema; - this.scoringFunctionProvider = scoringFunctionProvider; - this.minScore = minScore; - this.maxScore = maxScore; - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - return new DefaultFilterWeight(this) { - @Override - protected DocIdSetIterator getDocIdSetIterator(LeafReaderContext context) throws IOException { - ScoringFunction scoringFunction = scoringFunctionProvider.getScoringFunction(); - scoringFunction.setNextReader((EarlybirdIndexSegmentAtomicReader) context.reader()); - return new ScoreFilterDocIdSetIterator( - context.reader(), scoringFunction, minScore, maxScore); - } - }; - } - - private static final class ScoreFilterDocIdSetIterator extends RangeFilterDISI { - private final ScoringFunction scoringFunction; - private final float minScore; - private final float maxScore; - - public ScoreFilterDocIdSetIterator(LeafReader indexReader, ScoringFunction scoringFunction, - float minScore, float maxScore) throws IOException { - super(indexReader); - this.scoringFunction = scoringFunction; - this.minScore = minScore; - this.maxScore = maxScore; - } - - @Override - protected boolean shouldReturnDoc() throws IOException { - float score = scoringFunction.score(docID(), DEFAULT_LUCENE_SCORE); - return score >= minScore && score <= maxScore; - } - } - - public float getMinScoreForTest() { - return minScore; - } - - public float getMaxScoreForTest() { - return maxScore; - } - - public ScoringFunctionProvider getScoringFunctionProviderForTest() { - return scoringFunctionProvider; - } - - @Override - public int hashCode() { - return (int) (minScore * 29 - + maxScore * 17 - + (scoringFunctionProvider == null ? 0 : scoringFunctionProvider.hashCode())); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof ScoreFilterQuery)) { - return false; - } - - ScoreFilterQuery filter = ScoreFilterQuery.class.cast(obj); - return (minScore == filter.minScore) - && (maxScore == filter.maxScore) - && (scoringFunctionProvider == null - ? filter.scoringFunctionProvider == null - : scoringFunctionProvider.equals(filter.scoringFunctionProvider)); - } - - @Override - public String toString(String field) { - return "SCORE_FILTER_QUERY[minScore=" + minScore + ",maxScore=" + maxScore + "]"; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.docx b/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.docx new file mode 100644 index 000000000..5e004f1a0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.java b/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.java deleted file mode 100644 index 5eea104c9..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/collectors/AbstractRelevanceCollector.java +++ /dev/null @@ -1,147 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.collectors; - -import java.io.IOException; - -import com.google.common.base.Preconditions; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.facets.LanguageHistogram; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.AbstractResultsCollector; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchRequestInfo; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchResults; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.stats.EarlybirdSearcherStats; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/** - * AbstractRelevanceCollector is a results collector that collects RelevanceHit results - * which include more detailed information than a normal Hit. - */ -public abstract class AbstractRelevanceCollector - extends AbstractResultsCollector { - protected final ScoringFunction scoringFunction; - private final ThriftSearchResultsRelevanceStats relevanceStats; - private final EarlybirdCluster cluster; - private final UserTable userTable; - - // Per-language result counts. - private final LanguageHistogram languageHistogram = new LanguageHistogram(); - - // Accumulated time spend on relevance scoring across all collected hits, including batch scoring. - private long scoringTimeNanos = 0; - - public AbstractRelevanceCollector( - ImmutableSchemaInterface schema, - RelevanceSearchRequestInfo searchRequestInfo, - ScoringFunction scoringFunction, - EarlybirdSearcherStats searcherStats, - EarlybirdCluster cluster, - UserTable userTable, - Clock clock, - int requestDebugMode) { - super(schema, searchRequestInfo, clock, searcherStats, requestDebugMode); - this.scoringFunction = scoringFunction; - this.relevanceStats = new ThriftSearchResultsRelevanceStats(); - this.cluster = cluster; - this.userTable = userTable; - } - - /** - * Subclasses must implement this method to actually collect a scored relevance hit. - */ - protected abstract void doCollectWithScore(long tweetID, float score) throws IOException; - - @Override - public final void startSegment() throws IOException { - scoringFunction.setNextReader(currTwitterReader); - - ThriftSearchResultMetadataOptions options = - searchRequestInfo.getSearchQuery().getResultMetadataOptions(); - featuresRequested = options != null && options.isReturnSearchResultFeatures(); - } - - @Override - protected final void doCollect(long tweetID) throws IOException { - final long scoringStartNanos = getClock().nowNanos(); - float luceneSore = scorer.score(); - final float score = scoringFunction.score(curDocId, luceneSore); - final long scoringEndNanos = getClock().nowNanos(); - addToOverallScoringTimeNanos(scoringStartNanos, scoringEndNanos); - - scoringFunction.updateRelevanceStats(relevanceStats); - - updateHitCounts(tweetID); - - doCollectWithScore(tweetID, score); - } - - protected final void addToOverallScoringTimeNanos(long scoringStartNanos, long scoringEndNanos) { - scoringTimeNanos += scoringEndNanos - scoringStartNanos; - } - - protected final ThriftSearchResultMetadata collectMetadata() throws IOException { - ThriftSearchResultMetadataOptions options = - searchRequestInfo.getSearchQuery().getResultMetadataOptions(); - Preconditions.checkNotNull(options); - ThriftSearchResultMetadata metadata = - Preconditions.checkNotNull(scoringFunction.getResultMetadata(options)); - if (metadata.isSetLanguage()) { - languageHistogram.increment(metadata.getLanguage().getValue()); - } - - // Some additional metadata which is not provided by the scoring function, but - // by accessing the reader directly. - if (currTwitterReader != null) { - fillResultGeoLocation(metadata); - if (searchRequestInfo.isCollectConversationId()) { - long conversationId = - documentFeatures.getFeatureValue(EarlybirdFieldConstant.CONVERSATION_ID_CSF); - if (conversationId != 0) { - ensureExtraMetadataIsSet(metadata); - metadata.getExtraMetadata().setConversationId(conversationId); - } - } - } - - // Check and collect hit attribution data, if it's available. - fillHitAttributionMetadata(metadata); - - long fromUserId = documentFeatures.getFeatureValue(EarlybirdFieldConstant.FROM_USER_ID_CSF); - if (searchRequestInfo.isGetFromUserId()) { - metadata.setFromUserId(fromUserId); - } - - collectExclusiveConversationAuthorId(metadata); - collectFacets(metadata); - collectFeatures(metadata); - collectIsProtected(metadata, cluster, userTable); - - return metadata; - } - - protected final ThriftSearchResultsRelevanceStats getRelevanceStats() { - return relevanceStats; - } - - public final LanguageHistogram getLanguageHistogram() { - return languageHistogram; - } - - @Override - protected final RelevanceSearchResults doGetResults() throws IOException { - final RelevanceSearchResults results = doGetRelevanceResults(); - results.setScoringTimeNanos(scoringTimeNanos); - return results; - } - - /** - * For subclasses to process and aggregate collected hits. - */ - protected abstract RelevanceSearchResults doGetRelevanceResults() throws IOException; -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.docx b/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.docx new file mode 100644 index 000000000..78bb86aac Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.java b/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.java deleted file mode 100644 index 3e4f6a711..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/collectors/BatchRelevanceTopCollector.java +++ /dev/null @@ -1,118 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.collectors; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import com.twitter.common.collections.Pair; -import com.twitter.common.util.Clock; -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.common.search.EarlyTerminationState; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchRequestInfo; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchResults; -import com.twitter.search.earlybird.search.relevance.scoring.BatchHit; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.stats.EarlybirdSearcherStats; -import com.twitter.search.earlybird.thrift.ThriftSearchRelevanceOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; - -/** - * BatchRelevanceTopCollector is similar to the `RelevanceTopCollector` in what it outputs: - * Collects the top numResults by score, filtering out duplicates - * and results with scores equal to Flat.MIN_VALUE. - * The way that it achieves that is different though: it will score documents through the batch score - * function instead of scoring documents one by one. - */ -public class BatchRelevanceTopCollector extends RelevanceTopCollector { - protected final List hits; - - public BatchRelevanceTopCollector( - ImmutableSchemaInterface schema, - RelevanceSearchRequestInfo searchRequestInfo, - ScoringFunction scoringFunction, - EarlybirdSearcherStats searcherStats, - EarlybirdCluster cluster, - UserTable userTable, - Clock clock, - int requestDebugMode) { - super(schema, searchRequestInfo, scoringFunction, searcherStats, cluster, userTable, clock, - requestDebugMode); - this.hits = new ArrayList<>((int) getMaxHitsToProcess()); - } - - @Override - protected void doCollectWithScore(long tweetID, float score) throws IOException { - Pair pair = - scoringFunction.collectFeatures(score); - ThriftSearchResultMetadata metadata = collectMetadata(); - hits.add(new BatchHit(pair.getFirst(), - pair.getSecond(), - metadata, - tweetID, - currTimeSliceID)); - } - - @Override - public EarlyTerminationState innerShouldCollectMore() { - if (hits.size() >= getMaxHitsToProcess()) { - return setEarlyTerminationState(EarlyTerminationState.TERMINATED_MAX_HITS_EXCEEDED); - } - return EarlyTerminationState.COLLECTING; - } - - @Override - protected RelevanceSearchResults doGetRelevanceResults() throws IOException { - final long scoringStartNanos = getClock().nowNanos(); - float[] scores = scoringFunction.batchScore(hits); - final long scoringEndNanos = getClock().nowNanos(); - addToOverallScoringTimeNanos(scoringStartNanos, scoringEndNanos); - exportBatchScoringTime(scoringEndNanos - scoringStartNanos); - - for (int i = 0; i < hits.size(); i++) { - BatchHit hit = hits.get(i); - ThriftSearchResultMetadata metadata = hit.getMetadata(); - - if (!metadata.isSetExtraMetadata()) { - metadata.setExtraMetadata(new ThriftSearchResultExtraMetadata()); - } - metadata.getExtraMetadata().setFeatures(hit.getFeatures()); - - - // Populate the ThriftSearchResultMetadata post batch scoring with information from the - // LinearScoringData, which now includes a score. - scoringFunction.populateResultMetadataBasedOnScoringData( - searchRequestInfo.getSearchQuery().getResultMetadataOptions(), - metadata, - hit.getScoringData()); - - collectWithScoreInternal( - hit.getTweetID(), - hit.getTimeSliceID(), - scores[i], - metadata - ); - } - return getRelevanceResultsInternal(); - } - - private void exportBatchScoringTime(long scoringTimeNanos) { - ThriftSearchRelevanceOptions relevanceOptions = searchRequestInfo.getRelevanceOptions(); - if (relevanceOptions.isSetRankingParams() - && relevanceOptions.getRankingParams().isSetSelectedTensorflowModel()) { - String model = relevanceOptions.getRankingParams().getSelectedTensorflowModel(); - SearchTimerStats batchScoringPerModelTimer = SearchTimerStats.export( - String.format("batch_scoring_time_for_model_%s", model), - TimeUnit.NANOSECONDS, - false, - true); - batchScoringPerModelTimer.timerIncrement(scoringTimeNanos); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.docx b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.docx new file mode 100644 index 000000000..52ea26708 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.java b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.java deleted file mode 100644 index c7cd8d50f..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceAllCollector.java +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.collectors; - -import java.io.IOException; -import java.util.List; - -import com.google.common.collect.Lists; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.relevance.features.TweetIntegerShingleSignature; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.relevance.RelevanceHit; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchRequestInfo; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchResults; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.stats.EarlybirdSearcherStats; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; - -/** - * RelevanceAllCollector is a results collector that collects all results sorted by score, - * including signature-duplicates and results skipped by the scoring function. - */ -public class RelevanceAllCollector extends AbstractRelevanceCollector { - // All results. - protected final List results; - - public RelevanceAllCollector( - ImmutableSchemaInterface schema, - RelevanceSearchRequestInfo searchRequestInfo, - ScoringFunction scoringFunction, - EarlybirdSearcherStats searcherStats, - EarlybirdCluster cluster, - UserTable userTable, - Clock clock, - int requestDebugMode) { - super(schema, searchRequestInfo, scoringFunction, searcherStats, cluster, userTable, clock, - requestDebugMode); - this.results = Lists.newArrayList(); - } - - @Override - protected void doCollectWithScore(long tweetID, float score) throws IOException { - ThriftSearchResultMetadata metadata = collectMetadata(); - scoringFunction.populateResultMetadataBasedOnScoringData( - searchRequestInfo.getSearchQuery().getResultMetadataOptions(), - metadata, - scoringFunction.getScoringDataForCurrentDocument()); - results.add(new RelevanceHit( - currTimeSliceID, - tweetID, - TweetIntegerShingleSignature.deserialize(metadata.getSignature()), - metadata)); - } - - @Override - protected RelevanceSearchResults doGetRelevanceResults() { - final int numResults = results.size(); - RelevanceSearchResults searchResults = new RelevanceSearchResults(numResults); - - // Insert hits in decreasing order by score. - results.sort(RelevanceHit.COMPARATOR_BY_SCORE); - for (int i = 0; i < numResults; i++) { - searchResults.setHit(results.get(i), i); - } - searchResults.setRelevanceStats(getRelevanceStats()); - searchResults.setNumHits(numResults); - return searchResults; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.docx b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.docx new file mode 100644 index 000000000..cefc35d38 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.java b/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.java deleted file mode 100644 index ef921070c..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/collectors/RelevanceTopCollector.java +++ /dev/null @@ -1,167 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.collectors; - -import java.io.IOException; - -import com.google.common.base.Preconditions; - -import com.twitter.common.util.Clock; -import com.twitter.common_internal.collections.RandomAccessPriorityQueue; -import com.twitter.search.common.relevance.features.TweetIntegerShingleSignature; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.common.search.EarlyTerminationState; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.relevance.RelevanceHit; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchRequestInfo; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchResults; -import com.twitter.search.earlybird.search.relevance.scoring.ScoringFunction; -import com.twitter.search.earlybird.stats.EarlybirdSearcherStats; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/** - * RelevanceTopCollector is a results collector that collects the top numResults by - * score, filtering out duplicates. - */ -public class RelevanceTopCollector extends AbstractRelevanceCollector { - // Search results are collected in a min-heap. - protected final RandomAccessPriorityQueue minQueue; - - // Number of hits actually added to the min queue after dupe filtering and skipping. - // Less than or equal to numHitsProcessed. - protected int numHitsCollected; - - // The 'top' of the min heap, or, the lowest scored document in the heap. - private RelevanceHit pqTop; - private float lowestScore = ScoringFunction.SKIP_HIT; - - private final boolean isFilterDupes; - - public RelevanceTopCollector( - ImmutableSchemaInterface schema, - RelevanceSearchRequestInfo searchRequestInfo, - ScoringFunction scoringFunction, - EarlybirdSearcherStats searcherStats, - EarlybirdCluster cluster, - UserTable userTable, - Clock clock, - int requestDebugMode) { - super(schema, searchRequestInfo, scoringFunction, searcherStats, cluster, userTable, clock, - requestDebugMode); - this.minQueue = new RandomAccessPriorityQueue( - searchRequestInfo.getNumResultsRequested(), RelevanceHit.PQ_COMPARATOR_BY_SCORE) { - @Override - protected RelevanceHit getSentinelObject() { - return new RelevanceHit(); // default relevance constructor would create a hit with the - // lowest score possible. - } - }; - this.pqTop = minQueue.top(); - this.isFilterDupes = getSearchRequestInfo().getRelevanceOptions().isFilterDups(); - } - - protected void collectWithScoreInternal( - long tweetID, - long timeSliceID, - float score, - ThriftSearchResultMetadata metadata) { - // This collector cannot handle these scores: - assert !Float.isNaN(score); - - if (score <= lowestScore) { - // Since docs are returned in-order (i.e., increasing doc Id), a document - // with equal score to pqTop.score cannot compete since HitQueue favors - // documents with lower doc Ids. Therefore reject those docs too. - // IMPORTANT: docs skipped by the scoring function will have scores set - // to ScoringFunction.SKIP_HIT, meaning they will not be collected. - return; - } - - boolean dupFound = false; - Preconditions.checkState(metadata.isSetSignature(), - "The signature should be set at metadata collection time, but it is null. " - + "Tweet id = %s, metadata = %s", - tweetID, - metadata); - int signatureInt = metadata.getSignature(); - final TweetIntegerShingleSignature signature = - TweetIntegerShingleSignature.deserialize(signatureInt); - - if (isFilterDupes) { - // update duplicate if any - if (signatureInt != TweetIntegerShingleSignature.DEFAULT_NO_SIGNATURE) { - dupFound = minQueue.incrementElement( - signature, - element -> { - if (score > element.getScore()) { - element.update(timeSliceID, tweetID, signature, metadata); - } - } - ); - } - } - - if (!dupFound) { - numHitsCollected++; - - // if we didn't find a duplicate element to update then we add it now as a new element to the - // pq - pqTop = minQueue.updateTop(top -> top.update(timeSliceID, tweetID, signature, metadata)); - - lowestScore = pqTop.getScore(); - } - } - - @Override - protected void doCollectWithScore(final long tweetID, final float score) throws IOException { - ThriftSearchResultMetadata metadata = collectMetadata(); - scoringFunction.populateResultMetadataBasedOnScoringData( - searchRequestInfo.getSearchQuery().getResultMetadataOptions(), - metadata, - scoringFunction.getScoringDataForCurrentDocument()); - collectWithScoreInternal(tweetID, currTimeSliceID, score, metadata); - } - - @Override - public EarlyTerminationState innerShouldCollectMore() { - // Note that numHitsCollected here might be less than num results collected in the - // TwitterEarlyTerminationCollector, if we hit dups or there are very low scores. - if (numHitsCollected >= getMaxHitsToProcess()) { - return setEarlyTerminationState(EarlyTerminationState.TERMINATED_MAX_HITS_EXCEEDED); - } - return EarlyTerminationState.COLLECTING; - } - - @Override - protected RelevanceSearchResults doGetRelevanceResults() throws IOException { - return getRelevanceResultsInternal(); - } - - protected RelevanceSearchResults getRelevanceResultsInternal() { - return resultsFromQueue(minQueue, getSearchRequestInfo().getNumResultsRequested(), - getRelevanceStats()); - } - - private static RelevanceSearchResults resultsFromQueue( - RandomAccessPriorityQueue pq, - int desiredNumResults, - ThriftSearchResultsRelevanceStats relevanceStats) { - // trim first in case we didn't fill up the queue to not get any sentinel values here - int numResults = pq.trim(); - if (numResults > desiredNumResults) { - for (int i = 0; i < numResults - desiredNumResults; i++) { - pq.pop(); - } - numResults = desiredNumResults; - } - RelevanceSearchResults results = new RelevanceSearchResults(numResults); - // insert hits in decreasing order by score - for (int i = numResults - 1; i >= 0; i--) { - RelevanceHit hit = pq.pop(); - results.setHit(hit, i); - } - results.setRelevanceStats(relevanceStats); - results.setNumHits(numResults); - return results; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.docx new file mode 100644 index 000000000..15714b70e Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.java deleted file mode 100644 index ed42bf319..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/BatchHit.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; - -public class BatchHit { - private final LinearScoringData scoringData; - private final ThriftSearchResultFeatures features; - private final ThriftSearchResultMetadata metadata; - private final long tweetID; - private final long timeSliceID; - - public BatchHit( - LinearScoringData scoringData, - ThriftSearchResultFeatures features, - ThriftSearchResultMetadata metadata, - long tweetID, - long timeSliceID - ) { - this.scoringData = scoringData; - this.features = features; - this.metadata = metadata; - this.tweetID = tweetID; - this.timeSliceID = timeSliceID; - } - - public LinearScoringData getScoringData() { - return scoringData; - } - - public ThriftSearchResultFeatures getFeatures() { - return features; - } - - public ThriftSearchResultMetadata getMetadata() { - return metadata; - } - - public long getTweetID() { - return tweetID; - } - - public long getTimeSliceID() { - return timeSliceID; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.docx new file mode 100644 index 000000000..442903c06 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.java deleted file mode 100644 index ead0078d8..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/DefaultScoringFunction.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/* - * A sample scorer, doesn't really do anything, returns the same score for every document. - */ -public class DefaultScoringFunction extends ScoringFunction { - private float score; - - public DefaultScoringFunction(ImmutableSchemaInterface schema) { - super(schema); - } - - @Override - protected float score(float luceneQueryScore) { - score = luceneQueryScore; - return luceneQueryScore; - } - - @Override - protected Explanation doExplain(float luceneScore) { - // just an example - this scoring function will go away soon - return Explanation.match(luceneScore, "luceneScore=" + luceneScore); - } - - @Override - public void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - relevanceStats.setNumScored(relevanceStats.getNumScored() + 1); - if (score == ScoringFunction.SKIP_HIT) { - relevanceStats.setNumSkipped(relevanceStats.getNumSkipped() + 1); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.docx new file mode 100644 index 000000000..00624849b Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.java deleted file mode 100644 index b6fe1f3ad..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/FeatureBasedScoringFunction.java +++ /dev/null @@ -1,1360 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.primitives.Ints; -import com.google.common.primitives.Longs; - -import org.apache.lucene.search.Explanation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common_internal.bloomfilter.BloomFilter; -import com.twitter.search.common.constants.SearchCardType; -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; -import com.twitter.search.common.database.DatabaseConfig; -import com.twitter.search.common.features.ExternalTweetFeature; -import com.twitter.search.common.features.FeatureHandler; -import com.twitter.search.common.features.thrift.ThriftSearchFeatureSchemaEntry; -import com.twitter.search.common.features.thrift.ThriftSearchFeatureType; -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.common.query.QueryCommonFieldHitsVisitor; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.relevance.features.AgeDecay; -import com.twitter.search.common.relevance.features.RelevanceSignalConstants; -import com.twitter.search.common.relevance.text.VisibleTokenRatioNormalizer; -import com.twitter.search.common.results.thriftjava.FieldHitList; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.common.util.LongIntConverter; -import com.twitter.search.common.util.lang.ThriftLanguageUtil; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.AntiGamingFilter; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.search.relevance.LinearScoringData.SkipReason; -import com.twitter.search.earlybird.search.relevance.LinearScoringParams; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; -import com.twitter.search.earlybird.thrift.ThriftSocialFilterType; - -/** - * Base class for scoring functions that rely on the extracted features stored in LinearScoringData. - * - * Extensions of this class must implement 2 methods: - * - * - computeScore - * - generateExplanationForScoring - * - * They are called for scoring and generating the debug information of the document that it's - * currently being evaluated. The field 'data' holds the features of the document. - */ -public abstract class FeatureBasedScoringFunction extends ScoringFunction { - private static final Logger LOG = LoggerFactory.getLogger(FeatureBasedScoringFunction.class); - - // A multiplier that's applied to all scores to avoid scores too low. - public static final float SCORE_ADJUSTER = 100.0f; - - private static final VisibleTokenRatioNormalizer VISIBLE_TOKEN_RATIO_NORMALIZER = - VisibleTokenRatioNormalizer.createInstance(); - - // Allow default values only for numeric types. - private static final Set ALLOWED_TYPES_FOR_DEFAULT_FEATURE_VALUES = - EnumSet.of(ThriftSearchFeatureType.INT32_VALUE, - ThriftSearchFeatureType.LONG_VALUE, - ThriftSearchFeatureType.DOUBLE_VALUE); - - private static final Set NUMERIC_FEATURES_FOR_WHICH_DEFAULTS_SHOULD_NOT_BE_SET = - ImmutableSet.of(EarlybirdFieldConstant.TWEET_SIGNATURE.getFieldId(), - EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_LEAST_SIGNIFICANT_INT.getFieldId(), - EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_MOST_SIGNIFICANT_INT.getFieldId()); - - // Name of the scoring function. Used for generating explanations. - private final String functionName; - - private final BloomFilter trustedFilter; - private final BloomFilter followFilter; - - // Current timestamp in seconds. Overridable by unit test or by timestamp set in search query. - private int now; - - private final AntiGamingFilter antiGamingFilter; - - @Nullable - private final AgeDecay ageDecay; - - protected final LinearScoringParams params; // Parameters and query-dependent values. - - // In order for the API calls to retrieve the correct `LinearScoringData` - // for the passed `docId`, we need to maintain a map of `docId` -> `LinearScoringData` - // NOTE: THIS CAN ONLY BE REFERENCED AT HIT COLLECTION TIME, SINCE DOC IDS ARE NOT UNIQUE - // ACROSS SEGMENTS. IT'S NOT USABLE DURING BATCH SCORING. - private final Map docIdToScoringData; - - private final ThriftSearchResultType searchResultType; - - private final UserTable userTable; - - @VisibleForTesting - void setNow(int fakeNow) { - now = fakeNow; - } - - public FeatureBasedScoringFunction( - String functionName, - ImmutableSchemaInterface schema, - ThriftSearchQuery searchQuery, - AntiGamingFilter antiGamingFilter, - ThriftSearchResultType searchResultType, - UserTable userTable) throws IOException { - super(schema); - - this.functionName = functionName; - this.searchResultType = searchResultType; - this.userTable = userTable; - - Preconditions.checkNotNull(searchQuery.getRelevanceOptions()); - ThriftRankingParams rankingParams = searchQuery.getRelevanceOptions().getRankingParams(); - Preconditions.checkNotNull(rankingParams); - - params = new LinearScoringParams(searchQuery, rankingParams); - docIdToScoringData = new HashMap<>(); - - long timestamp = searchQuery.isSetTimestampMsecs() && searchQuery.getTimestampMsecs() > 0 - ? searchQuery.getTimestampMsecs() : System.currentTimeMillis(); - now = Ints.checkedCast(TimeUnit.MILLISECONDS.toSeconds(timestamp)); - - this.antiGamingFilter = antiGamingFilter; - - this.ageDecay = params.useAgeDecay - ? new AgeDecay(params.ageDecayBase, params.ageDecayHalflife, params.ageDecaySlope) - : null; - - if (searchQuery.isSetTrustedFilter()) { - trustedFilter = new BloomFilter(searchQuery.getTrustedFilter()); - } else { - trustedFilter = null; - } - - if (searchQuery.isSetDirectFollowFilter()) { - followFilter = new BloomFilter(searchQuery.getDirectFollowFilter()); - } else { - followFilter = null; - } - } - - @VisibleForTesting - final LinearScoringParams getScoringParams() { - return params; - } - - /** - * Returns the LinearScoringData instance associated with the current doc ID. If it doesn't exist, - * an empty LinearScoringData is created. - */ - @Override - public LinearScoringData getScoringDataForCurrentDocument() { - LinearScoringData data = docIdToScoringData.get(getCurrentDocID()); - if (data == null) { - data = new LinearScoringData(); - docIdToScoringData.put(getCurrentDocID(), data); - } - return data; - } - - @Override - public void setDebugMode(int debugMode) { - super.setDebugMode(debugMode); - } - - /** - * Normal the lucene score, which was unbounded, to a range of [1.0, maxLuceneScoreBoost]. - * The normalized value increases almost linearly in the lucene score range 2.0 ~ 7.0, where - * most queries fall in. For rare long tail queries, like some hashtags, they have high idf and - * thus high lucene score, the normalized value won't have much difference between tweets. - * The normalization function is: - * ls = luceneScore - * norm = min(max, 1 + (max - 1.0) / 2.4 * ln(1 + ls) - */ - static float normalizeLuceneScore(float luceneScore, float maxBoost) { - return (float) Math.min(maxBoost, 1.0 + (maxBoost - 1.0) / 2.4 * Math.log1p(luceneScore)); - } - - @Override - protected float score(float luceneQueryScore) throws IOException { - return scoreInternal(luceneQueryScore, null); - } - - protected LinearScoringData updateLinearScoringData(float luceneQueryScore) throws IOException { - // Reset the data for each tweet!!! - LinearScoringData data = new LinearScoringData(); - docIdToScoringData.put(getCurrentDocID(), data); - - // Set proper version for engagement counters for this request. - data.skipReason = SkipReason.NOT_SKIPPED; - data.luceneScore = luceneQueryScore; - data.userRep = (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.USER_REPUTATION); - - if (antiGamingFilter != null && !antiGamingFilter.accept(getCurrentDocID())) { - data.skipReason = SkipReason.ANTIGAMING; - return data; - } - - data.textScore = (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.TEXT_SCORE); - data.tokenAt140DividedByNumTokensBucket = VISIBLE_TOKEN_RATIO_NORMALIZER.denormalize( - (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.VISIBLE_TOKEN_RATIO)); - data.fromUserId = documentFeatures.getFeatureValue(EarlybirdFieldConstant.FROM_USER_ID_CSF); - data.isFollow = followFilter != null - && followFilter.contains(Longs.toByteArray(data.fromUserId)); - data.isTrusted = trustedFilter != null - && trustedFilter.contains(Longs.toByteArray(data.fromUserId)); - data.isFromVerifiedAccount = documentFeatures.isFlagSet( - EarlybirdFieldConstant.FROM_VERIFIED_ACCOUNT_FLAG); - data.isFromBlueVerifiedAccount = documentFeatures.isFlagSet( - EarlybirdFieldConstant.FROM_BLUE_VERIFIED_ACCOUNT_FLAG); - data.isSelfTweet = data.fromUserId == params.searcherId; - // v1 engagement counters, note that the first three values are post-log2 version - // of the original unnormalized values. - data.retweetCountPostLog2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.RETWEET_COUNT); - data.replyCountPostLog2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.REPLY_COUNT); - data.favCountPostLog2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.FAVORITE_COUNT); - data.embedsImpressionCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EMBEDS_IMPRESSION_COUNT); - data.embedsUrlCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EMBEDS_URL_COUNT); - data.videoViewCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.VIDEO_VIEW_COUNT); - // v2 engagement counters - data.retweetCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.RETWEET_COUNT_V2); - data.replyCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.REPLY_COUNT_V2); - data.favCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.FAVORITE_COUNT_V2); - // other v2 engagement counters - data.embedsImpressionCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EMBEDS_IMPRESSION_COUNT_V2); - data.embedsUrlCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EMBEDS_URL_COUNT_V2); - data.videoViewCountV2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.VIDEO_VIEW_COUNT_V2); - // pure v2 engagement counters without v1 counterpart - data.quotedCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.QUOTE_COUNT); - data.weightedRetweetCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.WEIGHTED_RETWEET_COUNT); - data.weightedReplyCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.WEIGHTED_REPLY_COUNT); - data.weightedFavCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.WEIGHTED_FAVORITE_COUNT); - data.weightedQuoteCount = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.WEIGHTED_QUOTE_COUNT); - - Double querySpecificScoreAdjustment = params.querySpecificScoreAdjustments == null ? null - : params.querySpecificScoreAdjustments.get(tweetIDMapper.getTweetID(getCurrentDocID())); - data.querySpecificScore = - querySpecificScoreAdjustment == null ? 0.0 : querySpecificScoreAdjustment; - - data.authorSpecificScore = params.authorSpecificScoreAdjustments == null - ? 0.0 - : params.authorSpecificScoreAdjustments.getOrDefault(data.fromUserId, 0.0); - - // respect social filter type - if (params.socialFilterType != null && !data.isSelfTweet) { - if ((params.socialFilterType == ThriftSocialFilterType.ALL - && !data.isFollow && !data.isTrusted) - || (params.socialFilterType == ThriftSocialFilterType.TRUSTED && !data.isTrusted) - || (params.socialFilterType == ThriftSocialFilterType.FOLLOWS && !data.isFollow)) { - // we can skip this hit as we only want social results in this mode. - data.skipReason = SkipReason.SOCIAL_FILTER; - return data; - } - } - - // 1. first apply all the filters to only non-follow tweets and non-verified accounts, - // but be tender to sentinel values - // unless you specifically asked to apply filters regardless - if (params.applyFiltersAlways - || (!data.isSelfTweet && !data.isFollow && !data.isFromVerifiedAccount - && !data.isFromBlueVerifiedAccount)) { - if (data.userRep < params.reputationMinVal - // don't filter unset userreps, we give them the benefit of doubt and let it - // continue to scoring. userrep is unset when either user just signed up or - // during ingestion time we had trouble getting userrep from reputation service. - && data.userRep != RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL) { - data.skipReason = SkipReason.LOW_REPUTATION; - return data; - } else if (data.textScore < params.textScoreMinVal - // don't filter unset text scores, use goodwill value - && data.textScore != RelevanceSignalConstants.UNSET_TEXT_SCORE_SENTINEL) { - data.skipReason = SkipReason.LOW_TEXT_SCORE; - return data; - } else if (data.retweetCountPostLog2 != LinearScoringData.UNSET_SIGNAL_VALUE - && data.retweetCountPostLog2 < params.retweetMinVal) { - data.skipReason = SkipReason.LOW_RETWEET_COUNT; - return data; - } else if (data.favCountPostLog2 != LinearScoringData.UNSET_SIGNAL_VALUE - && data.favCountPostLog2 < params.favMinVal) { - data.skipReason = SkipReason.LOW_FAV_COUNT; - return data; - } - } - - // if sentinel value is set, assume goodwill score and let scoring continue. - if (data.textScore == RelevanceSignalConstants.UNSET_TEXT_SCORE_SENTINEL) { - data.textScore = RelevanceSignalConstants.GOODWILL_TEXT_SCORE; - } - if (data.userRep == RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL) { - data.userRep = RelevanceSignalConstants.GOODWILL_REPUTATION; - } - - data.tweetAgeInSeconds = now - timeMapper.getTime(getCurrentDocID()); - if (data.tweetAgeInSeconds < 0) { - data.tweetAgeInSeconds = 0; // Age cannot be negative - } - - // The PARUS_SCORE feature should be read as is. - data.parusScore = documentFeatures.getFeatureValue(EarlybirdFieldConstant.PARUS_SCORE); - - data.isNullcast = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_NULLCAST_FLAG); - data.hasUrl = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_LINK_FLAG); - data.hasImageUrl = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_IMAGE_URL_FLAG); - data.hasVideoUrl = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_VIDEO_URL_FLAG); - data.hasNewsUrl = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_NEWS_URL_FLAG); - data.isReply = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_REPLY_FLAG); - data.isRetweet = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_RETWEET_FLAG); - data.isOffensive = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_OFFENSIVE_FLAG); - data.hasTrend = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_TREND_FLAG); - data.hasMultipleHashtagsOrTrends = - documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_MULTIPLE_HASHTAGS_OR_TRENDS_FLAG); - data.isUserSpam = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_USER_SPAM_FLAG); - data.isUserNSFW = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_USER_NSFW_FLAG) - || userTable.isSet(data.fromUserId, UserTable.NSFW_BIT); - data.isUserAntiSocial = - userTable.isSet(data.fromUserId, UserTable.ANTISOCIAL_BIT); - data.isUserBot = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_USER_BOT_FLAG); - data.hasCard = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_CARD_FLAG); - data.cardType = SearchCardType.UNKNOWN.getByteValue(); - if (data.hasCard) { - data.cardType = - (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.CARD_TYPE_CSF_FIELD); - } - data.hasVisibleLink = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_VISIBLE_LINK_FLAG); - - data.hasConsumerVideo = - documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_CONSUMER_VIDEO_FLAG); - data.hasProVideo = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_PRO_VIDEO_FLAG); - data.hasVine = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_VINE_FLAG); - data.hasPeriscope = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_PERISCOPE_FLAG); - data.hasNativeImage = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_NATIVE_IMAGE_FLAG); - data.hasQuote = documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_QUOTE_FLAG); - data.isComposerSourceCamera = - documentFeatures.isFlagSet(EarlybirdFieldConstant.COMPOSER_SOURCE_IS_CAMERA_FLAG); - - // Only read the shared status if the isRetweet or isReply bit is true (minor optimization). - if (data.isRetweet || (params.getInReplyToStatusId && data.isReply)) { - data.sharedStatusId = - documentFeatures.getFeatureValue(EarlybirdFieldConstant.SHARED_STATUS_ID_CSF); - } - - // Only read the reference tweet author ID if the isRetweet or isReply bit - // is true (minor optimization). - if (data.isRetweet || data.isReply) { - // the REFERENCE_AUTHOR_ID_CSF stores the source tweet author id for all retweets - long referenceAuthorId = - documentFeatures.getFeatureValue(EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_CSF); - if (referenceAuthorId > 0) { - data.referenceAuthorId = referenceAuthorId; - } else { - // we also store the reference author id for retweets, directed at tweets, and self threaded - // tweets separately on Realtime/Protected Earlybirds. This data will be moved to the - // REFERENCE_AUTHOR_ID_CSF and these fields will be deprecated in SEARCH-34958. - referenceAuthorId = LongIntConverter.convertTwoIntToOneLong( - (int) documentFeatures.getFeatureValue( - EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_MOST_SIGNIFICANT_INT), - (int) documentFeatures.getFeatureValue( - EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_LEAST_SIGNIFICANT_INT)); - if (referenceAuthorId > 0) { - data.referenceAuthorId = referenceAuthorId; - } - } - } - - // Convert language to a thrift language and then back to an int in order to - // ensure a value compatible with our current ThriftLanguage definition. - ThriftLanguage tweetLang = ThriftLanguageUtil.safeFindByValue( - (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.LANGUAGE)); - data.tweetLangId = tweetLang.getValue(); - // Set the language-related features here so that they can be later used in promotion/demotion - // and also be transferred to ThriftSearchResultMetadata - data.userLangMult = computeUserLangMultiplier(data, params); - data.hasDifferentLang = params.uiLangId != ThriftLanguage.UNKNOWN.getValue() - && params.uiLangId != data.tweetLangId; - data.hasEnglishTweetAndDifferentUILang = data.hasDifferentLang - && data.tweetLangId == ThriftLanguage.ENGLISH.getValue(); - data.hasEnglishUIAndDifferentTweetLang = data.hasDifferentLang - && params.uiLangId == ThriftLanguage.ENGLISH.getValue(); - - // Exposed all these features for the clients. - data.isSensitiveContent = - documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_SENSITIVE_CONTENT); - data.hasMultipleMediaFlag = - documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_MULTIPLE_MEDIA_FLAG); - data.profileIsEggFlag = documentFeatures.isFlagSet(EarlybirdFieldConstant.PROFILE_IS_EGG_FLAG); - data.isUserNewFlag = documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_USER_NEW_FLAG); - data.numMentions = (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.NUM_MENTIONS); - data.numHashtags = (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.NUM_HASHTAGS); - data.linkLanguage = - (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.LINK_LANGUAGE); - data.prevUserTweetEngagement = - (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.PREV_USER_TWEET_ENGAGEMENT); - - // health model scores by HML - data.toxicityScore = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.TOXICITY_SCORE); - data.pBlockScore = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.PBLOCK_SCORE); - data.pSpammyTweetScore = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.P_SPAMMY_TWEET_SCORE); - data.pReportedTweetScore = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.P_REPORTED_TWEET_SCORE); - data.spammyTweetContentScore = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.SPAMMY_TWEET_CONTENT_SCORE - ); - data.experimentalHealthModelScore1 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EXPERIMENTAL_HEALTH_MODEL_SCORE_1); - data.experimentalHealthModelScore2 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EXPERIMENTAL_HEALTH_MODEL_SCORE_2); - data.experimentalHealthModelScore3 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EXPERIMENTAL_HEALTH_MODEL_SCORE_3); - data.experimentalHealthModelScore4 = documentFeatures.getUnnormalizedFeatureValue( - EarlybirdFieldConstant.EXPERIMENTAL_HEALTH_MODEL_SCORE_4); - - return data; - } - - protected float scoreInternal( - float luceneQueryScore, ExplanationWrapper explanation) throws IOException { - LinearScoringData data = updateLinearScoringData(luceneQueryScore); - if (data.skipReason != null && data.skipReason != SkipReason.NOT_SKIPPED) { - return finalizeScore(data, explanation, SKIP_HIT); - } - - double score = computeScore(data, explanation != null); - return postScoreComputation(data, score, true, explanation); - } - - protected float postScoreComputation( - LinearScoringData data, - double score, - boolean boostScoreWithHitAttribution, - ExplanationWrapper explanation) throws IOException { - double modifiedScore = score; - data.scoreBeforeBoost = modifiedScore; - if (params.applyBoosts) { - modifiedScore = - applyBoosts(data, modifiedScore, boostScoreWithHitAttribution, explanation != null); - } - // Final adjustment to avoid too-low scores. - modifiedScore *= SCORE_ADJUSTER; - data.scoreAfterBoost = modifiedScore; - - // 3. final score filter - data.scoreFinal = modifiedScore; - if ((params.applyFiltersAlways || (!data.isSelfTweet && !data.isFollow)) - && modifiedScore < params.minScore) { - data.skipReason = SkipReason.LOW_FINAL_SCORE; - modifiedScore = SKIP_HIT; - } - - // clear field hits - this.fieldHitAttribution = null; - return finalizeScore(data, explanation, modifiedScore); - } - - /** - * Applying promotion/demotion to the scores generated by feature-based scoring functions - * - * @param data Original LinearScoringData (to be modified with boosts here) - * @param score Score generated by the feature-based scoring function - * @param withHitAttribution Determines if hit attribution data should be included. - * @param forExplanation Indicates if the score will be computed for generating the explanation. - * @return Score after applying promotion/demotion - */ - private double applyBoosts( - LinearScoringData data, - double score, - boolean withHitAttribution, - boolean forExplanation) { - double boostedScore = score; - - if (params.useLuceneScoreAsBoost) { - data.normalizedLuceneScore = normalizeLuceneScore( - (float) data.luceneScore, (float) params.maxLuceneScoreBoost); - boostedScore *= data.normalizedLuceneScore; - } - if (data.isOffensive) { - boostedScore *= params.offensiveDamping; - } - if (data.isUserSpam && params.spamUserDamping != LinearScoringData.NO_BOOST_VALUE) { - data.spamUserDampApplied = true; - boostedScore *= params.spamUserDamping; - } - if (data.isUserNSFW && params.nsfwUserDamping != LinearScoringData.NO_BOOST_VALUE) { - data.nsfwUserDampApplied = true; - boostedScore *= params.nsfwUserDamping; - } - if (data.isUserBot && params.botUserDamping != LinearScoringData.NO_BOOST_VALUE) { - data.botUserDampApplied = true; - boostedScore *= params.botUserDamping; - } - - // cards - if (data.hasCard && params.hasCardBoosts[data.cardType] != LinearScoringData.NO_BOOST_VALUE) { - boostedScore *= params.hasCardBoosts[data.cardType]; - data.hasCardBoostApplied = true; - } - - // trends - if (data.hasMultipleHashtagsOrTrends) { - boostedScore *= params.multipleHashtagsOrTrendsDamping; - } else if (data.hasTrend) { - data.tweetHasTrendsBoostApplied = true; - boostedScore *= params.tweetHasTrendBoost; - } - - // Media/News url boosts. - if (data.hasImageUrl || data.hasVideoUrl) { - data.hasMedialUrlBoostApplied = true; - boostedScore *= params.tweetHasMediaUrlBoost; - } - if (data.hasNewsUrl) { - data.hasNewsUrlBoostApplied = true; - boostedScore *= params.tweetHasNewsUrlBoost; - } - - if (data.isFromVerifiedAccount) { - data.tweetFromVerifiedAccountBoostApplied = true; - boostedScore *= params.tweetFromVerifiedAccountBoost; - } - - if (data.isFromBlueVerifiedAccount) { - data.tweetFromBlueVerifiedAccountBoostApplied = true; - boostedScore *= params.tweetFromBlueVerifiedAccountBoost; - } - - if (data.isFollow) { - // direct follow, so boost both replies and non-replies. - data.directFollowBoostApplied = true; - boostedScore *= params.directFollowBoost; - } else if (data.isTrusted) { - // trusted circle - if (!data.isReply) { - // non-at-reply, in trusted network - data.trustedCircleBoostApplied = true; - boostedScore *= params.trustedCircleBoost; - } - } else if (data.isReply) { - // at-reply out of my network - data.outOfNetworkReplyPenaltyApplied = true; - boostedScore -= params.outOfNetworkReplyPenalty; - } - - if (data.isSelfTweet) { - data.selfTweetBoostApplied = true; - data.selfTweetMult = params.selfTweetBoost; - boostedScore *= params.selfTweetBoost; - } - - // Language Demotion - // User language based demotion - // The data.userLangMult is set in scoreInternal(), and this setting step is always before - // the applying boosts step - if (params.useUserLanguageInfo) { - boostedScore *= data.userLangMult; - } - // UI language based demotion - if (params.uiLangId != ThriftLanguage.UNKNOWN.getValue() - && params.uiLangId != data.tweetLangId) { - if (data.tweetLangId == ThriftLanguage.ENGLISH.getValue()) { - data.uiLangMult = params.langEnglishTweetDemote; - } else if (params.uiLangId == ThriftLanguage.ENGLISH.getValue()) { - data.uiLangMult = params.langEnglishUIDemote; - } else { - data.uiLangMult = params.langDefaultDemote; - } - } else { - data.uiLangMult = LinearScoringData.NO_BOOST_VALUE; - } - boostedScore *= data.uiLangMult; - - if (params.useAgeDecay) { - // shallow sigmoid with an inflection point at ageDecayHalflife - data.ageDecayMult = ageDecay.getAgeDecayMultiplier(data.tweetAgeInSeconds); - boostedScore *= data.ageDecayMult; - } - - // Hit Attribute Demotion - // Scoring is currently based on tokenized user name, text, and url in the tweet - // If hit attribute collection is enabled, we demote score based on these fields - if (hitAttributeHelper != null && params.enableHitDemotion) { - - Map> hitMap; - if (forExplanation && fieldHitAttribution != null) { - // if this scoring call is for generating an explanation, - // we'll use the fieldHitAttribution found in the search result's metadata because - // collectors are not called during the debug workflow - hitMap = Maps.transformValues(fieldHitAttribution.getHitMap(), FieldHitList::getHitFields); - } else if (withHitAttribution) { - hitMap = hitAttributeHelper.getHitAttribution(getCurrentDocID()); - } else { - hitMap = Maps.newHashMap(); - } - Set uniqueFieldHits = ImmutableSet.copyOf(Iterables.concat(hitMap.values())); - - data.hitFields.addAll(uniqueFieldHits); - // there should always be fields that are hit - // if there aren't, we assume this is a call from 'explain' in debug mode - // do not override hit attribute data if in debug mode - if (!uniqueFieldHits.isEmpty()) { - // demotions based strictly on field hits - if (uniqueFieldHits.size() == 1) { - if (uniqueFieldHits.contains( - EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName())) { - // if url was the only field that was hit, demote - data.hasUrlOnlyHitDemotionApplied = true; - boostedScore *= params.urlOnlyHitDemotion; - } else if (uniqueFieldHits.contains( - EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName())) { - // if name was the only field that was hit, demote - data.hasNameOnlyHitDemotionApplied = true; - boostedScore *= params.nameOnlyHitDemotion; - } - } else if (!uniqueFieldHits.contains(EarlybirdFieldConstant.TEXT_FIELD.getFieldName()) - && !uniqueFieldHits.contains(EarlybirdFieldConstant.MENTIONS_FIELD.getFieldName()) - && !uniqueFieldHits.contains(EarlybirdFieldConstant.HASHTAGS_FIELD.getFieldName()) - && !uniqueFieldHits.contains(EarlybirdFieldConstant.STOCKS_FIELD.getFieldName())) { - // if text or special text was never hit, demote - data.hasNoTextHitDemotionApplied = true; - boostedScore *= params.noTextHitDemotion; - } else if (uniqueFieldHits.size() == 2) { - // demotions based on field hit combinations - // want to demote if we only hit two of the fields (one being text) - // but with separate terms - Set fieldIntersections = QueryCommonFieldHitsVisitor.findIntersection( - hitAttributeHelper.getNodeToRankMap(), - hitMap, - query); - - if (fieldIntersections.isEmpty()) { - if (uniqueFieldHits.contains( - EarlybirdFieldConstant.TOKENIZED_FROM_USER_FIELD.getFieldName())) { - // if name is hit but has no hits in common with text, demote - // want to demote cases where we hit part of the person's name - // and tweet text separately - data.hasSeparateTextAndNameHitDemotionApplied = true; - boostedScore *= params.separateTextAndNameHitDemotion; - } else if (uniqueFieldHits.contains( - EarlybirdFieldConstant.RESOLVED_LINKS_TEXT_FIELD.getFieldName())) { - // if url is hit but has no hits in common with text, demote - // want to demote cases where we hit a potential domain keyword - // and tweet text separately - data.hasSeparateTextAndUrlHitDemotionApplied = true; - boostedScore *= params.separateTextAndUrlHitDemotion; - } - } - } - } - } - - return boostedScore; - } - - /** - * Compute the user language based demotion multiplier - */ - private static double computeUserLangMultiplier( - LinearScoringData data, LinearScoringParams params) { - if (data.tweetLangId == params.uiLangId - && data.tweetLangId != ThriftLanguage.UNKNOWN.getValue()) { - // Effectively the uiLang is considered a language that user knows with 1.0 confidence. - return LinearScoringData.NO_BOOST_VALUE; - } - - if (params.userLangs[data.tweetLangId] > 0.0) { - return params.userLangs[data.tweetLangId]; - } - - return params.unknownLanguageBoost; - } - - /** - * Computes the score of the document that it's currently being evaluated. - * - * The extracted features from the document are available in the field 'data'. - * - * @param data The LinearScoringData instance that will store the document features. - * @param forExplanation Indicates if the score will be computed for generating the explanation. - */ - protected abstract double computeScore( - LinearScoringData data, boolean forExplanation) throws IOException; - - private float finalizeScore( - LinearScoringData scoringData, - ExplanationWrapper explanation, - double score) throws IOException { - scoringData.scoreReturned = score; - if (explanation != null) { - explanation.explanation = generateExplanation(scoringData); - } - return (float) score; - } - - @Override - protected void initializeNextSegment(EarlybirdIndexSegmentAtomicReader reader) - throws IOException { - if (antiGamingFilter != null) { - antiGamingFilter.startSegment(reader); - } - } - - /* - * Generate the scoring explanation for debug. - */ - private Explanation generateExplanation(LinearScoringData scoringData) throws IOException { - final List details = Lists.newArrayList(); - - details.add(Explanation.match(0.0f, "[PROPERTIES] " - + scoringData.getPropertyExplanation())); - - // 1. Filters - boolean isHit = scoringData.skipReason == SkipReason.NOT_SKIPPED; - if (scoringData.skipReason == SkipReason.ANTIGAMING) { - details.add(Explanation.noMatch("SKIPPED for antigaming")); - } - if (scoringData.skipReason == SkipReason.LOW_REPUTATION) { - details.add(Explanation.noMatch( - String.format("SKIPPED for low reputation: %.3f < %.3f", - scoringData.userRep, params.reputationMinVal))); - } - if (scoringData.skipReason == SkipReason.LOW_TEXT_SCORE) { - details.add(Explanation.noMatch( - String.format("SKIPPED for low text score: %.3f < %.3f", - scoringData.textScore, params.textScoreMinVal))); - } - if (scoringData.skipReason == SkipReason.LOW_RETWEET_COUNT) { - details.add(Explanation.noMatch( - String.format("SKIPPED for low retweet count: %.3f < %.3f", - scoringData.retweetCountPostLog2, params.retweetMinVal))); - } - if (scoringData.skipReason == SkipReason.LOW_FAV_COUNT) { - details.add(Explanation.noMatch( - String.format("SKIPPED for low fav count: %.3f < %.3f", - scoringData.favCountPostLog2, params.favMinVal))); - } - if (scoringData.skipReason == SkipReason.SOCIAL_FILTER) { - details.add(Explanation.noMatch("SKIPPED for not in the right social circle")); - } - - // 2. Explanation depending on the scoring type - generateExplanationForScoring(scoringData, isHit, details); - - // 3. Explanation depending on boosts - if (params.applyBoosts) { - generateExplanationForBoosts(scoringData, isHit, details); - } - - // 4. Final score filter. - if (scoringData.skipReason == SkipReason.LOW_FINAL_SCORE) { - details.add(Explanation.noMatch("SKIPPED for low final score: " + scoringData.scoreFinal)); - isHit = false; - } - - String hostAndSegment = String.format("%s host = %s segment = %s", - functionName, DatabaseConfig.getLocalHostname(), DatabaseConfig.getDatabase()); - if (isHit) { - return Explanation.match((float) scoringData.scoreFinal, hostAndSegment, details); - } else { - return Explanation.noMatch(hostAndSegment, details); - } - } - - /** - * Generates the explanation for the document that is currently being evaluated. - * - * Implementations of this method must use the 'details' parameter to collect its output. - * - * @param scoringData Scoring components for the document - * @param isHit Indicates whether the document is not skipped - * @param details Details of the explanation. Used to collect the output. - */ - protected abstract void generateExplanationForScoring( - LinearScoringData scoringData, boolean isHit, List details) throws IOException; - - /** - * Generates the boosts part of the explanation for the document that is currently - * being evaluated. - */ - private void generateExplanationForBoosts( - LinearScoringData scoringData, - boolean isHit, - List details) { - List boostDetails = Lists.newArrayList(); - - boostDetails.add(Explanation.match((float) scoringData.scoreBeforeBoost, "Score before boost")); - - - // Lucene score boost - if (params.useLuceneScoreAsBoost) { - boostDetails.add(Explanation.match( - (float) scoringData.normalizedLuceneScore, - String.format("[x] Lucene score boost, luceneScore=%.3f", - scoringData.luceneScore))); - } - - // card boost - if (scoringData.hasCardBoostApplied) { - boostDetails.add(Explanation.match((float) params.hasCardBoosts[scoringData.cardType], - "[x] card boost for type " + SearchCardType.cardTypeFromByteValue(scoringData.cardType))); - } - - // Offensive - if (scoringData.isOffensive) { - boostDetails.add(Explanation.match((float) params.offensiveDamping, "[x] Offensive damping")); - } else { - boostDetails.add(Explanation.match(LinearScoringData.NO_BOOST_VALUE, - String.format("Not Offensive, damping=%.3f", params.offensiveDamping))); - } - - // Spam - if (scoringData.spamUserDampApplied) { - boostDetails.add(Explanation.match((float) params.spamUserDamping, "[x] Spam")); - } - // NSFW - if (scoringData.nsfwUserDampApplied) { - boostDetails.add(Explanation.match((float) params.nsfwUserDamping, "[X] NSFW")); - } - // Bot - if (scoringData.botUserDampApplied) { - boostDetails.add(Explanation.match((float) params.botUserDamping, "[X] Bot")); - } - - // Multiple hashtags or trends - if (scoringData.hasMultipleHashtagsOrTrends) { - boostDetails.add(Explanation.match((float) params.multipleHashtagsOrTrendsDamping, - "[x] Multiple hashtags or trends boost")); - } else { - boostDetails.add(Explanation.match(LinearScoringData.NO_BOOST_VALUE, - String.format("No multiple hashtags or trends, damping=%.3f", - params.multipleHashtagsOrTrendsDamping))); - } - - if (scoringData.tweetHasTrendsBoostApplied) { - boostDetails.add(Explanation.match( - (float) params.tweetHasTrendBoost, "[x] Tweet has trend boost")); - } - - if (scoringData.hasMedialUrlBoostApplied) { - boostDetails.add(Explanation.match( - (float) params.tweetHasMediaUrlBoost, "[x] Media url boost")); - } - - if (scoringData.hasNewsUrlBoostApplied) { - boostDetails.add(Explanation.match( - (float) params.tweetHasNewsUrlBoost, "[x] News url boost")); - } - - boostDetails.add(Explanation.match(0.0f, "[FIELDS HIT] " + scoringData.hitFields)); - - if (scoringData.hasNoTextHitDemotionApplied) { - boostDetails.add(Explanation.match( - (float) params.noTextHitDemotion, "[x] No text hit demotion")); - } - - if (scoringData.hasUrlOnlyHitDemotionApplied) { - boostDetails.add(Explanation.match( - (float) params.urlOnlyHitDemotion, "[x] URL only hit demotion")); - } - - if (scoringData.hasNameOnlyHitDemotionApplied) { - boostDetails.add(Explanation.match( - (float) params.nameOnlyHitDemotion, "[x] Name only hit demotion")); - } - - if (scoringData.hasSeparateTextAndNameHitDemotionApplied) { - boostDetails.add(Explanation.match((float) params.separateTextAndNameHitDemotion, - "[x] Separate text/name demotion")); - } - - if (scoringData.hasSeparateTextAndUrlHitDemotionApplied) { - boostDetails.add(Explanation.match((float) params.separateTextAndUrlHitDemotion, - "[x] Separate text/url demotion")); - } - - if (scoringData.tweetFromVerifiedAccountBoostApplied) { - boostDetails.add(Explanation.match((float) params.tweetFromVerifiedAccountBoost, - "[x] Verified account boost")); - } - - if (scoringData.tweetFromBlueVerifiedAccountBoostApplied) { - boostDetails.add(Explanation.match((float) params.tweetFromBlueVerifiedAccountBoost, - "[x] Blue-verified account boost")); - } - - if (scoringData.selfTweetBoostApplied) { - boostDetails.add(Explanation.match((float) params.selfTweetBoost, - "[x] Self tweet boost")); - } - - if (scoringData.skipReason == LinearScoringData.SkipReason.SOCIAL_FILTER) { - boostDetails.add(Explanation.noMatch("SKIPPED for social filter")); - } else { - if (scoringData.directFollowBoostApplied) { - boostDetails.add(Explanation.match((float) params.directFollowBoost, - "[x] Direct follow boost")); - } - if (scoringData.trustedCircleBoostApplied) { - boostDetails.add(Explanation.match((float) params.trustedCircleBoost, - "[x] Trusted circle boost")); - } - if (scoringData.outOfNetworkReplyPenaltyApplied) { - boostDetails.add(Explanation.match((float) params.outOfNetworkReplyPenalty, - "[-] Out of network reply penalty")); - } - } - - // Language demotions - String langDetails = String.format( - "tweetLang=[%s] uiLang=[%s]", - ThriftLanguageUtil.getLocaleOf( - ThriftLanguage.findByValue(scoringData.tweetLangId)).getLanguage(), - ThriftLanguageUtil.getLocaleOf(ThriftLanguage.findByValue(params.uiLangId)).getLanguage()); - if (scoringData.uiLangMult == 1.0) { - boostDetails.add(Explanation.match( - LinearScoringData.NO_BOOST_VALUE, "No UI Language demotion: " + langDetails)); - } else { - boostDetails.add(Explanation.match( - (float) scoringData.uiLangMult, "[x] UI LangMult: " + langDetails)); - } - StringBuilder userLangDetails = new StringBuilder(); - userLangDetails.append("userLang=["); - for (int i = 0; i < params.userLangs.length; i++) { - if (params.userLangs[i] > 0.0) { - String lang = ThriftLanguageUtil.getLocaleOf(ThriftLanguage.findByValue(i)).getLanguage(); - userLangDetails.append(String.format("%s:%.3f,", lang, params.userLangs[i])); - } - } - userLangDetails.append("]"); - if (!params.useUserLanguageInfo) { - boostDetails.add(Explanation.noMatch( - "No User Language Demotion: " + userLangDetails.toString())); - } else { - boostDetails.add(Explanation.match( - (float) scoringData.userLangMult, - "[x] User LangMult: " + userLangDetails.toString())); - } - - // Age decay - String ageDecayDetails = String.format( - "age=%d seconds, slope=%.3f, base=%.1f, half-life=%.0f", - scoringData.tweetAgeInSeconds, params.ageDecaySlope, - params.ageDecayBase, params.ageDecayHalflife); - if (params.useAgeDecay) { - boostDetails.add(Explanation.match( - (float) scoringData.ageDecayMult, "[x] AgeDecay: " + ageDecayDetails)); - } else { - boostDetails.add(Explanation.match(1.0f, "Age decay disabled: " + ageDecayDetails)); - } - - // Score adjuster - boostDetails.add(Explanation.match(SCORE_ADJUSTER, "[x] score adjuster")); - - Explanation boostCombo = isHit - ? Explanation.match((float) scoringData.scoreAfterBoost, - "(MATCH) After Boosts and Demotions:", boostDetails) - : Explanation.noMatch("After Boosts and Demotions:", boostDetails); - - details.add(boostCombo); - } - - @Override - protected Explanation doExplain(float luceneQueryScore) throws IOException { - // Run the scorer again and get the explanation. - ExplanationWrapper explanation = new ExplanationWrapper(); - scoreInternal(luceneQueryScore, explanation); - return explanation.explanation; - } - - @Override - public void populateResultMetadataBasedOnScoringData( - ThriftSearchResultMetadataOptions options, - ThriftSearchResultMetadata metadata, - LinearScoringData data) throws IOException { - metadata.setResultType(searchResultType); - metadata.setScore(data.scoreReturned); - metadata.setFromUserId(data.fromUserId); - - if (data.isTrusted) { - metadata.setIsTrusted(true); - } - if (data.isFollow) { - metadata.setIsFollow(true); - } - if (data.skipReason != SkipReason.NOT_SKIPPED) { - metadata.setSkipped(true); - } - if ((data.isRetweet || (params.getInReplyToStatusId && data.isReply)) - && data.sharedStatusId != LinearScoringData.UNSET_SIGNAL_VALUE) { - metadata.setSharedStatusId(data.sharedStatusId); - } - if (data.hasCard) { - metadata.setCardType(data.cardType); - } - - // Optional features. Note: other optional metadata is populated by - // AbstractRelevanceCollector, not the scoring function. - - if (options.isGetLuceneScore()) { - metadata.setLuceneScore(data.luceneScore); - } - if (options.isGetReferencedTweetAuthorId() - && data.referenceAuthorId != LinearScoringData.UNSET_SIGNAL_VALUE) { - metadata.setReferencedTweetAuthorId(data.referenceAuthorId); - } - - if (options.isGetMediaBits()) { - metadata.setHasConsumerVideo(data.hasConsumerVideo); - metadata.setHasProVideo(data.hasProVideo); - metadata.setHasVine(data.hasVine); - metadata.setHasPeriscope(data.hasPeriscope); - boolean hasNativeVideo = - data.hasConsumerVideo || data.hasProVideo || data.hasVine || data.hasPeriscope; - metadata.setHasNativeVideo(hasNativeVideo); - metadata.setHasNativeImage(data.hasNativeImage); - } - - metadata - .setIsOffensive(data.isOffensive) - .setIsReply(data.isReply) - .setIsRetweet(data.isRetweet) - .setHasLink(data.hasUrl) - .setHasTrend(data.hasTrend) - .setHasMultipleHashtagsOrTrends(data.hasMultipleHashtagsOrTrends) - .setRetweetCount((int) data.retweetCountPostLog2) - .setFavCount((int) data.favCountPostLog2) - .setReplyCount((int) data.replyCountPostLog2) - .setEmbedsImpressionCount((int) data.embedsImpressionCount) - .setEmbedsUrlCount((int) data.embedsUrlCount) - .setVideoViewCount((int) data.videoViewCount) - .setResultType(searchResultType) - .setFromVerifiedAccount(data.isFromVerifiedAccount) - .setIsUserSpam(data.isUserSpam) - .setIsUserNSFW(data.isUserNSFW) - .setIsUserBot(data.isUserBot) - .setHasImage(data.hasImageUrl) - .setHasVideo(data.hasVideoUrl) - .setHasNews(data.hasNewsUrl) - .setHasCard(data.hasCard) - .setHasVisibleLink(data.hasVisibleLink) - .setParusScore(data.parusScore) - .setTextScore(data.textScore) - .setUserRep(data.userRep) - .setTokenAt140DividedByNumTokensBucket(data.tokenAt140DividedByNumTokensBucket); - - if (!metadata.isSetExtraMetadata()) { - metadata.setExtraMetadata(new ThriftSearchResultExtraMetadata()); - } - ThriftSearchResultExtraMetadata extraMetadata = metadata.getExtraMetadata(); - - // Promotion/Demotion features - extraMetadata.setUserLangScore(data.userLangMult) - .setHasDifferentLang(data.hasDifferentLang) - .setHasEnglishTweetAndDifferentUILang(data.hasEnglishTweetAndDifferentUILang) - .setHasEnglishUIAndDifferentTweetLang(data.hasEnglishUIAndDifferentTweetLang) - .setHasQuote(data.hasQuote) - .setQuotedCount((int) data.quotedCount) - .setWeightedRetweetCount((int) data.weightedRetweetCount) - .setWeightedReplyCount((int) data.weightedReplyCount) - .setWeightedFavCount((int) data.weightedFavCount) - .setWeightedQuoteCount((int) data.weightedQuoteCount) - .setQuerySpecificScore(data.querySpecificScore) - .setAuthorSpecificScore(data.authorSpecificScore) - .setRetweetCountV2((int) data.retweetCountV2) - .setFavCountV2((int) data.favCountV2) - .setReplyCountV2((int) data.replyCountV2) - .setIsComposerSourceCamera(data.isComposerSourceCamera) - .setFromBlueVerifiedAccount(data.isFromBlueVerifiedAccount); - - // Health model scores features - extraMetadata - .setToxicityScore(data.toxicityScore) - .setPBlockScore(data.pBlockScore) - .setPSpammyTweetScore(data.pSpammyTweetScore) - .setPReportedTweetScore(data.pReportedTweetScore) - .setSpammyTweetContentScore(data.spammyTweetContentScore) - .setExperimentalHealthModelScore1(data.experimentalHealthModelScore1) - .setExperimentalHealthModelScore2(data.experimentalHealthModelScore2) - .setExperimentalHealthModelScore3(data.experimentalHealthModelScore3) - .setExperimentalHealthModelScore4(data.experimentalHealthModelScore4); - - // Return all extra features for clients to consume. - if (options.isGetAllFeatures()) { - extraMetadata.setIsSensitiveContent(data.isSensitiveContent) - .setHasMultipleMediaFlag(data.hasMultipleMediaFlag) - .setProfileIsEggFlag(data.profileIsEggFlag) - .setIsUserNewFlag(data.isUserNewFlag) - .setNumMentions(data.numMentions) - .setNumHashtags(data.numHashtags) - .setLinkLanguage(data.linkLanguage) - .setPrevUserTweetEngagement(data.prevUserTweetEngagement); - } - - // Set features in new Feature Access API format, in the future this will be the only part - // needed in this method, we don't need to set any other metadata fields any more. - if (options.isReturnSearchResultFeatures()) { - // If the features are unset, and they were requested, then we can retrieve them. If they are - // already set, then we don't need to re-read the document features, and the reader - // is probably positioned over the wrong document so it will return incorrect results. - if (!extraMetadata.isSetFeatures()) { - // We ignore all features with default values when returning them in the response, - // because it saves a lot of network bandwidth. - ThriftSearchResultFeatures features = createFeaturesForDocument(data, true).getFeatures(); - extraMetadata.setFeatures(features); - } - - // The raw score may have changed since we created the features, so we should update it. - extraMetadata.getFeatures().getDoubleValues() - .put(ExternalTweetFeature.RAW_EARLYBIRD_SCORE.getId(), data.scoreFinal); - } - - metadata - .setIsSelfTweet(data.isSelfTweet) - .setIsUserAntiSocial(data.isUserAntiSocial); - } - - /** - * Create earlybird basic features and dervied features for current document. - * @return a FeatureHandler object where you can keep adding extra feature values, or you can - * call .getFeatures() on it to get a Thrift object to return. - */ - protected FeatureHandler createFeaturesForDocument( - LinearScoringData data, boolean ignoreDefaultValues) throws IOException { - ThriftSearchResultFeatures features = documentFeatures.getSearchResultFeatures(getSchema()); - if (!ignoreDefaultValues) { - setDefaultFeatureValues(features); - } - - // add derived features - return new FeatureHandler(features, ignoreDefaultValues) - .addDouble(ExternalTweetFeature.LUCENE_SCORE, data.luceneScore) - .addInt(ExternalTweetFeature.TWEET_AGE_IN_SECS, data.tweetAgeInSeconds) - .addBoolean(ExternalTweetFeature.IS_SELF_TWEET, data.isSelfTweet) - .addBoolean(ExternalTweetFeature.IS_FOLLOW_RETWEET, data.isFollow && data.isRetweet) - .addBoolean(ExternalTweetFeature.IS_TRUSTED_RETWEET, data.isTrusted && data.isRetweet) - .addBoolean(ExternalTweetFeature.AUTHOR_IS_FOLLOW, data.isFollow) - .addBoolean(ExternalTweetFeature.AUTHOR_IS_TRUSTED, data.isTrusted) - .addBoolean(ExternalTweetFeature.AUTHOR_IS_ANTISOCIAL, data.isUserAntiSocial) - .addBoolean(ExternalTweetFeature.HAS_DIFF_LANG, data.hasDifferentLang) - .addBoolean(ExternalTweetFeature.HAS_ENGLISH_TWEET_DIFF_UI_LANG, - data.hasEnglishTweetAndDifferentUILang) - .addBoolean(ExternalTweetFeature.HAS_ENGLISH_UI_DIFF_TWEET_LANG, - data.hasEnglishUIAndDifferentTweetLang) - .addDouble(ExternalTweetFeature.SEARCHER_LANG_SCORE, data.userLangMult) - .addDouble(ExternalTweetFeature.QUERY_SPECIFIC_SCORE, data.querySpecificScore) - .addDouble(ExternalTweetFeature.AUTHOR_SPECIFIC_SCORE, data.authorSpecificScore); - } - - /** - * Adds default values for most numeric features that do not have a value set yet in the given - * ThriftSearchResultFeatures instance. - * - * This method is needed because some models do not work properly with missing features. Instead, - * they expect all features to be present even if they are unset (their values are 0). - */ - protected void setDefaultFeatureValues(ThriftSearchResultFeatures features) { - for (Map.Entry entry - : getSchema().getSearchFeatureSchema().getEntries().entrySet()) { - int featureId = entry.getKey(); - ThriftSearchFeatureSchemaEntry schemaEntry = entry.getValue(); - if (shouldSetDefaultValueForFeature(schemaEntry.getFeatureType(), featureId)) { - switch (schemaEntry.getFeatureType()) { - case INT32_VALUE: - features.getIntValues().putIfAbsent(featureId, 0); - break; - case LONG_VALUE: - features.getLongValues().putIfAbsent(featureId, 0L); - break; - case DOUBLE_VALUE: - features.getDoubleValues().putIfAbsent(featureId, 0.0); - break; - default: - throw new IllegalArgumentException( - "Should set default values only for integer, long or double features. Instead, " - + "found feature " + featureId + " of type " + schemaEntry.getFeatureType()); - } - } - } - } - - protected void overrideFeatureValues(ThriftSearchResultFeatures features, - ThriftSearchResultFeatures overrideFeatures) { - LOG.info("Features before override {}", features); - if (overrideFeatures.isSetIntValues()) { - overrideFeatures.getIntValues().forEach(features::putToIntValues); - } - if (overrideFeatures.isSetLongValues()) { - overrideFeatures.getLongValues().forEach(features::putToLongValues); - } - if (overrideFeatures.isSetDoubleValues()) { - overrideFeatures.getDoubleValues().forEach(features::putToDoubleValues); - } - if (overrideFeatures.isSetBoolValues()) { - overrideFeatures.getBoolValues().forEach(features::putToBoolValues); - } - if (overrideFeatures.isSetStringValues()) { - overrideFeatures.getStringValues().forEach(features::putToStringValues); - } - if (overrideFeatures.isSetBytesValues()) { - overrideFeatures.getBytesValues().forEach(features::putToBytesValues); - } - if (overrideFeatures.isSetFeatureStoreDiscreteValues()) { - overrideFeatures.getFeatureStoreDiscreteValues().forEach( - features::putToFeatureStoreDiscreteValues); - } - if (overrideFeatures.isSetSparseBinaryValues()) { - overrideFeatures.getSparseBinaryValues().forEach(features::putToSparseBinaryValues); - } - if (overrideFeatures.isSetSparseContinuousValues()) { - overrideFeatures.getSparseContinuousValues().forEach(features::putToSparseContinuousValues); - } - if (overrideFeatures.isSetGeneralTensorValues()) { - overrideFeatures.getGeneralTensorValues().forEach(features::putToGeneralTensorValues); - } - if (overrideFeatures.isSetStringTensorValues()) { - overrideFeatures.getStringTensorValues().forEach(features::putToStringTensorValues); - } - LOG.info("Features after override {}", features); - } - - /** - * Check if a feature is eligible to have its default value automatically set when absent. - * We have a similar logic for building data record. - */ - private static boolean shouldSetDefaultValueForFeature( - ThriftSearchFeatureType type, int featureId) { - return ALLOWED_TYPES_FOR_DEFAULT_FEATURE_VALUES.contains(type) - && !NUMERIC_FEATURES_FOR_WHICH_DEFAULTS_SHOULD_NOT_BE_SET.contains(featureId) - && (ExternalTweetFeature.EARLYBIRD_INDEXED_FEATURE_IDS.contains(featureId) - || ExternalTweetFeature.EARLYBIRD_DERIVED_FEATURE_IDS.contains(featureId)); - } - - @Override - public void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - if (relevanceStats == null) { - return; - } - - LinearScoringData data = getScoringDataForCurrentDocument(); - - if (data.tweetAgeInSeconds > relevanceStats.getOldestScoredTweetAgeInSeconds()) { - relevanceStats.setOldestScoredTweetAgeInSeconds(data.tweetAgeInSeconds); - } - relevanceStats.setNumScored(relevanceStats.getNumScored() + 1); - if (data.scoreReturned == SKIP_HIT) { - relevanceStats.setNumSkipped(relevanceStats.getNumSkipped() + 1); - switch(data.skipReason) { - case ANTIGAMING: - relevanceStats.setNumSkippedForAntiGaming( - relevanceStats.getNumSkippedForAntiGaming() + 1); - break; - case LOW_REPUTATION: - relevanceStats.setNumSkippedForLowReputation( - relevanceStats.getNumSkippedForLowReputation() + 1); - break; - case LOW_TEXT_SCORE: - relevanceStats.setNumSkippedForLowTextScore( - relevanceStats.getNumSkippedForLowTextScore() + 1); - break; - case SOCIAL_FILTER: - relevanceStats.setNumSkippedForSocialFilter( - relevanceStats.getNumSkippedForSocialFilter() + 1); - break; - case LOW_FINAL_SCORE: - relevanceStats.setNumSkippedForLowFinalScore( - relevanceStats.getNumSkippedForLowFinalScore() + 1); - break; - case LOW_RETWEET_COUNT: - break; - default: - LOG.warn("Unknown SkipReason: " + data.skipReason); - } - } - - if (data.isFollow) { - relevanceStats.setNumFromDirectFollows(relevanceStats.getNumFromDirectFollows() + 1); - } - if (data.isTrusted) { - relevanceStats.setNumFromTrustedCircle(relevanceStats.getNumFromTrustedCircle() + 1); - } - if (data.isReply) { - relevanceStats.setNumReplies(relevanceStats.getNumReplies() + 1); - if (data.isTrusted) { - relevanceStats.setNumRepliesTrusted(relevanceStats.getNumRepliesTrusted() + 1); - } else if (!data.isFollow) { - relevanceStats.setNumRepliesOutOfNetwork(relevanceStats.getNumRepliesOutOfNetwork() + 1); - } - } - if (data.isSelfTweet) { - relevanceStats.setNumSelfTweets(relevanceStats.getNumSelfTweets() + 1); - } - if (data.hasImageUrl || data.hasVideoUrl) { - relevanceStats.setNumWithMedia(relevanceStats.getNumWithMedia() + 1); - } - if (data.hasNewsUrl) { - relevanceStats.setNumWithNews(relevanceStats.getNumWithNews() + 1); - } - if (data.isUserSpam) { - relevanceStats.setNumSpamUser(relevanceStats.getNumSpamUser() + 1); - } - if (data.isUserNSFW) { - relevanceStats.setNumOffensive(relevanceStats.getNumOffensive() + 1); - } - if (data.isUserBot) { - relevanceStats.setNumBot(relevanceStats.getNumBot() + 1); - } - } - - @VisibleForTesting - static final class ExplanationWrapper { - private Explanation explanation; - - public Explanation getExplanation() { - return explanation; - } - - @Override - public String toString() { - return explanation.toString(); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.docx new file mode 100644 index 000000000..79a6bfc0b Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.java deleted file mode 100644 index bbe79cf84..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LegacyScoreAccumulator.java +++ /dev/null @@ -1,98 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import com.twitter.search.common.util.ml.prediction_engine.BaseLegacyScoreAccumulator; -import com.twitter.search.common.util.ml.prediction_engine.LightweightLinearModel; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.modeling.tweet_ranking.TweetScoringFeatures; - -/** - * Legacy score accumulator in Earlybird with specific features added. - * This class is created to avoid adding LinearScoringData as a dependency to search's common ML - * library. - * - * @deprecated This class is retired and we suggest to switch to SchemaBasedScoreAccumulator. - */ -@Deprecated -public class LegacyScoreAccumulator extends BaseLegacyScoreAccumulator { - /** - * Constructs with a model and LinearScoringData - */ - LegacyScoreAccumulator(LightweightLinearModel model) { - super(model); - } - - /** - * Update the accumulator score with features, after this function the score should already - * be computed. - * - * @deprecated This function is retired and we suggest to switch to updateScoresWithFeatures in - * SchemaBasedScoreAccumulator. - */ - @Override - @Deprecated - protected void updateScoreWithFeatures(LinearScoringData data) { - addContinuousFeature(TweetScoringFeatures.LUCENE_SCORE, data.luceneScore); - addContinuousFeature(TweetScoringFeatures.TEXT_SCORE, data.textScore); - addContinuousFeature(TweetScoringFeatures.TWEET_AGE_IN_SECONDS, data.tweetAgeInSeconds); - addContinuousFeature(TweetScoringFeatures.REPLY_COUNT, data.replyCountPostLog2); - addContinuousFeature(TweetScoringFeatures.RETWEET_COUNT, data.retweetCountPostLog2); - addContinuousFeature(TweetScoringFeatures.FAV_COUNT, data.favCountPostLog2); - addContinuousFeature(TweetScoringFeatures.REPLY_COUNT_V2, data.replyCountV2); - addContinuousFeature(TweetScoringFeatures.RETWEET_COUNT_V2, data.retweetCountV2); - addContinuousFeature(TweetScoringFeatures.FAV_COUNT_V2, data.favCountV2); - addContinuousFeature(TweetScoringFeatures.EMBEDS_IMPRESSION_COUNT, - data.getEmbedsImpressionCount(false)); - addContinuousFeature(TweetScoringFeatures.EMBEDS_URL_COUNT, data.getEmbedsUrlCount(false)); - addContinuousFeature(TweetScoringFeatures.VIDEO_VIEW_COUNT, data.getVideoViewCount(false)); - addContinuousFeature(TweetScoringFeatures.QUOTED_COUNT, data.quotedCount); - addContinuousFeature(TweetScoringFeatures.WEIGHTED_RETWEET_COUNT, data.weightedRetweetCount); - addContinuousFeature(TweetScoringFeatures.WEIGHTED_REPLY_COUNT, data.weightedReplyCount); - addContinuousFeature(TweetScoringFeatures.WEIGHTED_FAV_COUNT, data.weightedFavCount); - addContinuousFeature(TweetScoringFeatures.WEIGHTED_QUOTE_COUNT, data.weightedQuoteCount); - addBinaryFeature(TweetScoringFeatures.HAS_URL, data.hasUrl); - addBinaryFeature(TweetScoringFeatures.HAS_CARD, data.hasCard); - addBinaryFeature(TweetScoringFeatures.HAS_VINE, data.hasVine); - addBinaryFeature(TweetScoringFeatures.HAS_PERISCOPE, data.hasPeriscope); - addBinaryFeature(TweetScoringFeatures.HAS_NATIVE_IMAGE, data.hasNativeImage); - addBinaryFeature(TweetScoringFeatures.HAS_IMAGE_URL, data.hasImageUrl); - addBinaryFeature(TweetScoringFeatures.HAS_NEWS_URL, data.hasNewsUrl); - addBinaryFeature(TweetScoringFeatures.HAS_VIDEO_URL, data.hasVideoUrl); - addBinaryFeature(TweetScoringFeatures.HAS_CONSUMER_VIDEO, data.hasConsumerVideo); - addBinaryFeature(TweetScoringFeatures.HAS_PRO_VIDEO, data.hasProVideo); - addBinaryFeature(TweetScoringFeatures.HAS_QUOTE, data.hasQuote); - addBinaryFeature(TweetScoringFeatures.HAS_TREND, data.hasTrend); - addBinaryFeature(TweetScoringFeatures.HAS_MULTIPLE_HASHTAGS_OR_TRENDS, - data.hasMultipleHashtagsOrTrends); - addBinaryFeature(TweetScoringFeatures.IS_OFFENSIVE, data.isOffensive); - addBinaryFeature(TweetScoringFeatures.IS_REPLY, data.isReply); - addBinaryFeature(TweetScoringFeatures.IS_RETWEET, data.isRetweet); - addBinaryFeature(TweetScoringFeatures.IS_SELF_TWEET, data.isSelfTweet); - addBinaryFeature(TweetScoringFeatures.IS_FOLLOW_RETWEET, data.isRetweet & data.isFollow); - addBinaryFeature(TweetScoringFeatures.IS_TRUSTED_RETWEET, data.isRetweet & data.isTrusted); - addContinuousFeature(TweetScoringFeatures.QUERY_SPECIFIC_SCORE, data.querySpecificScore); - addContinuousFeature(TweetScoringFeatures.AUTHOR_SPECIFIC_SCORE, data.authorSpecificScore); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_FOLLOW, data.isFollow); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_TRUSTED, data.isTrusted); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_VERIFIED, data.isFromVerifiedAccount); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_NSFW, data.isUserNSFW); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_SPAM, data.isUserSpam); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_BOT, data.isUserBot); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_ANTISOCIAL, data.isUserAntiSocial); - addContinuousFeature(TweetScoringFeatures.AUTHOR_REPUTATION, data.userRep); - addContinuousFeature(TweetScoringFeatures.SEARCHER_LANG_SCORE, data.userLangMult); - addBinaryFeature(TweetScoringFeatures.HAS_DIFFERENT_LANG, data.hasDifferentLang); - addBinaryFeature(TweetScoringFeatures.HAS_ENGLISH_TWEET_AND_DIFFERENT_UI_LANG, - data.hasEnglishTweetAndDifferentUILang); - addBinaryFeature(TweetScoringFeatures.HAS_ENGLISH_UI_AND_DIFFERENT_TWEET_LANG, - data.hasEnglishUIAndDifferentTweetLang); - addBinaryFeature(TweetScoringFeatures.IS_SENSITIVE_CONTENT, data.isSensitiveContent); - addBinaryFeature(TweetScoringFeatures.HAS_MULTIPLE_MEDIA, data.hasMultipleMediaFlag); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_PROFILE_EGG, data.profileIsEggFlag); - addBinaryFeature(TweetScoringFeatures.AUTHOR_IS_NEW, data.isUserNewFlag); - addContinuousFeature(TweetScoringFeatures.MENTIONS_COUNT, data.numMentions); - addContinuousFeature(TweetScoringFeatures.HASHTAGS_COUNT, data.numHashtags); - addContinuousFeature(TweetScoringFeatures.LINK_LANGUAGE_ID, data.linkLanguage); - addContinuousFeature(TweetScoringFeatures.LANGUAGE_ID, data.tweetLangId); - addBinaryFeature(TweetScoringFeatures.HAS_VISIBLE_LINK, data.hasVisibleLink); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.docx new file mode 100644 index 000000000..07d0f5823 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.java deleted file mode 100644 index 770f4f49b..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/LinearScoringFunction.java +++ /dev/null @@ -1,237 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.util.List; - -import com.google.common.collect.Lists; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.relevance.features.MutableFeatureNormalizers; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.search.AntiGamingFilter; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.search.relevance.LinearScoringParams; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; - -/** - * Scoring function that uses the weights and boosts provided in the scoring parameters from the - * request. - */ -public class LinearScoringFunction extends FeatureBasedScoringFunction { - private static final double BASE_SCORE = 0.0001; - - public LinearScoringFunction( - ImmutableSchemaInterface schema, - ThriftSearchQuery searchQuery, - AntiGamingFilter antiGamingFilter, - ThriftSearchResultType searchResultType, - UserTable userTable) throws IOException { - super("LinearScoringFunction", schema, searchQuery, antiGamingFilter, searchResultType, - userTable); - } - - @Override - protected double computeScore(LinearScoringData data, boolean forExplanation) throws IOException { - double score = BASE_SCORE; - - data.luceneContrib = params.useLuceneScoreAsBoost - ? 0.0 : params.luceneWeight * data.luceneScore; - - data.reputationContrib = params.reputationWeight * data.userRep; - data.textScoreContrib = params.textScoreWeight * data.textScore; - data.parusContrib = params.parusWeight * data.parusScore; - - // contributions from engagement counters. Note that we have "true" argument for all getters, - // which means all values will get scaled down for scoring, they were unbounded in raw form. - data.retweetContrib = params.retweetWeight * data.retweetCountPostLog2; - data.favContrib = params.favWeight * data.favCountPostLog2; - data.replyContrib = params.replyWeight * data.replyCountPostLog2; - data.embedsImpressionContrib = - params.embedsImpressionWeight * data.getEmbedsImpressionCount(true); - data.embedsUrlContrib = - params.embedsUrlWeight * data.getEmbedsUrlCount(true); - data.videoViewContrib = - params.videoViewWeight * data.getVideoViewCount(true); - data.quotedContrib = - params.quotedCountWeight * data.quotedCount; - - for (int i = 0; i < LinearScoringData.MAX_OFFLINE_EXPERIMENTAL_FIELDS; i++) { - data.offlineExpFeatureContributions[i] = - params.rankingOfflineExpWeights[i] * data.offlineExpFeatureValues[i]; - } - - data.hasUrlContrib = params.urlWeight * (data.hasUrl ? 1.0 : 0.0); - data.isReplyContrib = params.isReplyWeight * (data.isReply ? 1.0 : 0.0); - data.isFollowRetweetContrib = - params.followRetweetWeight * (data.isRetweet && data.isFollow ? 1.0 : 0.0); - data.isTrustedRetweetContrib = - params.trustedRetweetWeight * (data.isRetweet && data.isTrusted ? 1.0 : 0.0); - double replyCountOriginal = getUnscaledReplyCountFeatureValue(); - data.multipleReplyContrib = params.multipleReplyWeight - * (replyCountOriginal < params.multipleReplyMinVal ? 0.0 : replyCountOriginal); - - // We directly the query specific score as the contribution below as it doesn't need a weight - // for contribution computation. - score += data.luceneContrib - + data.reputationContrib - + data.textScoreContrib - + data.replyContrib - + data.multipleReplyContrib - + data.retweetContrib - + data.favContrib - + data.parusContrib - + data.embedsImpressionContrib - + data.embedsUrlContrib - + data.videoViewContrib - + data.quotedContrib - + data.hasUrlContrib - + data.isReplyContrib - + data.isFollowRetweetContrib - + data.isTrustedRetweetContrib - + data.querySpecificScore - + data.authorSpecificScore; - - for (int i = 0; i < LinearScoringData.MAX_OFFLINE_EXPERIMENTAL_FIELDS; i++) { - score += data.offlineExpFeatureContributions[i]; - } - - return score; - } - - /** - * Generates the explanation for the linear score. - */ - @Override - protected void generateExplanationForScoring( - LinearScoringData scoringData, boolean isHit, List details) throws IOException { - // 1. Linear components - final List linearDetails = Lists.newArrayList(); - addLinearElementExplanation( - linearDetails, "[LuceneQueryScore]", - params.luceneWeight, scoringData.luceneScore, scoringData.luceneContrib); - if (scoringData.hasCard) { - if (scoringData.cardAuthorMatchBoostApplied) { - linearDetails.add(Explanation.match( - (float) params.cardAuthorMatchBoosts[scoringData.cardType], - "[x] card author match boost")); - } - if (scoringData.cardDescriptionMatchBoostApplied) { - linearDetails.add(Explanation.match( - (float) params.cardDescriptionMatchBoosts[scoringData.cardType], - "[x] card description match boost")); - } - if (scoringData.cardDomainMatchBoostApplied) { - linearDetails.add(Explanation.match( - (float) params.cardDomainMatchBoosts[scoringData.cardType], - "[x] card domain match boost")); - } - if (scoringData.cardTitleMatchBoostApplied) { - linearDetails.add(Explanation.match( - (float) params.cardTitleMatchBoosts[scoringData.cardType], - "[x] card title match boost")); - } - } - addLinearElementExplanation( - linearDetails, "reputation", - params.reputationWeight, scoringData.userRep, scoringData.reputationContrib); - addLinearElementExplanation( - linearDetails, "text score", - params.textScoreWeight, scoringData.textScore, scoringData.textScoreContrib); - addLinearElementExplanation( - linearDetails, "reply count (log2)", - params.replyWeight, scoringData.replyCountPostLog2, scoringData.replyContrib); - addLinearElementExplanation( - linearDetails, "multi reply", - params.multipleReplyWeight, - getUnscaledReplyCountFeatureValue() > params.multipleReplyMinVal ? 1 : 0, - scoringData.multipleReplyContrib); - addLinearElementExplanation( - linearDetails, "retweet count (log2)", - params.retweetWeight, scoringData.retweetCountPostLog2, scoringData.retweetContrib); - addLinearElementExplanation( - linearDetails, "fav count (log2)", - params.favWeight, scoringData.favCountPostLog2, scoringData.favContrib); - addLinearElementExplanation( - linearDetails, "parus score", - params.parusWeight, scoringData.parusScore, scoringData.parusContrib); - for (int i = 0; i < LinearScoringData.MAX_OFFLINE_EXPERIMENTAL_FIELDS; i++) { - if (params.rankingOfflineExpWeights[i] != LinearScoringParams.DEFAULT_FEATURE_WEIGHT) { - addLinearElementExplanation(linearDetails, - "ranking exp score offline experimental #" + i, - params.rankingOfflineExpWeights[i], scoringData.offlineExpFeatureValues[i], - scoringData.offlineExpFeatureContributions[i]); - } - } - addLinearElementExplanation(linearDetails, - "embedded tweet impression count", - params.embedsImpressionWeight, scoringData.getEmbedsImpressionCount(false), - scoringData.embedsImpressionContrib); - addLinearElementExplanation(linearDetails, - "embedded tweet url count", - params.embedsUrlWeight, scoringData.getEmbedsUrlCount(false), - scoringData.embedsUrlContrib); - addLinearElementExplanation(linearDetails, - "video view count", - params.videoViewWeight, scoringData.getVideoViewCount(false), - scoringData.videoViewContrib); - addLinearElementExplanation(linearDetails, - "quoted count", - params.quotedCountWeight, scoringData.quotedCount, scoringData.quotedContrib); - - addLinearElementExplanation( - linearDetails, "has url", params.urlWeight, scoringData.hasUrl ? 1.0 : 0.0, - scoringData.hasUrlContrib); - - addLinearElementExplanation( - linearDetails, "is reply", params.isReplyWeight, - scoringData.isReply ? 1.0 : 0.0, scoringData.isReplyContrib); - addLinearElementExplanation( - linearDetails, "is follow retweet", params.followRetweetWeight, - scoringData.isRetweet && scoringData.isFollow ? 1.0 : 0.0, - scoringData.isFollowRetweetContrib); - addLinearElementExplanation( - linearDetails, "is trusted retweet", params.trustedRetweetWeight, - scoringData.isRetweet && scoringData.isTrusted ? 1.0 : 0.0, - scoringData.isTrustedRetweetContrib); - - if (scoringData.querySpecificScore != 0.0) { - linearDetails.add(Explanation.match((float) scoringData.querySpecificScore, - "[+] query specific score adjustment")); - } - if (scoringData.authorSpecificScore != 0.0) { - linearDetails.add(Explanation.match((float) scoringData.authorSpecificScore, - "[+] author specific score adjustment")); - } - - - Explanation linearCombo = isHit - ? Explanation.match((float) scoringData.scoreBeforeBoost, - "(MATCH) Linear components, sum of:", linearDetails) - : Explanation.noMatch("Linear components, sum of:", linearDetails); - - - details.add(linearCombo); - } - - private void addLinearElementExplanation(List explanation, - String name, - double weight, - double componentValue, - double contrib) { - if (contrib == 0.0) { - return; - } - explanation.add( - Explanation.match((float) contrib, - String.format("[+] %s=%.3f weight=%.3f", name, componentValue, weight))); - } - - private double getUnscaledReplyCountFeatureValue() throws IOException { - byte featureValue = (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.REPLY_COUNT); - return MutableFeatureNormalizers.BYTE_NORMALIZER.unnormLowerBound(featureValue); - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.docx new file mode 100644 index 000000000..ea7697494 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.java deleted file mode 100644 index 179f684cd..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ModelBasedScoringFunction.java +++ /dev/null @@ -1,151 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.util.ml.prediction_engine.LightweightLinearModel; -import com.twitter.search.common.util.ml.prediction_engine.SchemaBasedScoreAccumulator; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.exception.ClientException; -import com.twitter.search.earlybird.ml.ScoringModelsManager; -import com.twitter.search.earlybird.search.AntiGamingFilter; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; - -/** - * Scoring function that uses the scoring models specified from the request. - */ -public class ModelBasedScoringFunction extends FeatureBasedScoringFunction { - private final SelectedModel[] selectedModels; - private final boolean useLogitScore; - private final boolean isSchemaBased; - - private static final SearchCounter NUM_LEGACY_MODELS = - SearchCounter.export("scoring_function_num_legacy_models"); - private static final SearchCounter NUM_SCHEMA_BASED_MODELS = - SearchCounter.export("scoring_function_num_schema_based_models"); - private static final SearchCounter MIXED_MODEL_TYPES = - SearchCounter.export("scoring_function_mixed_model_types"); - - public ModelBasedScoringFunction( - ImmutableSchemaInterface schema, - ThriftSearchQuery searchQuery, - AntiGamingFilter antiGamingFilter, - ThriftSearchResultType searchResultType, - UserTable userTable, - ScoringModelsManager scoringModelsManager - ) throws IOException, ClientException { - - super("ModelBasedScoringFunction", schema, searchQuery, antiGamingFilter, searchResultType, - userTable); - - ThriftRankingParams rankingParams = searchQuery.getRelevanceOptions().getRankingParams(); - Preconditions.checkNotNull(rankingParams); - - if (rankingParams.getSelectedModelsSize() <= 0) { - throw new ClientException("Scoring type is MODEL_BASED but no models were selected"); - } - - Map models = rankingParams.getSelectedModels(); - - selectedModels = new SelectedModel[models.size()]; - int numSchemaBased = 0; - int i = 0; - for (Map.Entry nameAndWeight : models.entrySet()) { - Optional model = - scoringModelsManager.getModel(nameAndWeight.getKey()); - if (!model.isPresent()) { - throw new ClientException(String.format( - "Scoring function is MODEL_BASED. Selected model '%s' not found", - nameAndWeight.getKey())); - } - selectedModels[i] = - new SelectedModel(nameAndWeight.getKey(), nameAndWeight.getValue(), model.get()); - - if (selectedModels[i].model.isSchemaBased()) { - ++numSchemaBased; - NUM_SCHEMA_BASED_MODELS.increment(); - } else { - NUM_LEGACY_MODELS.increment(); - } - ++i; - } - - // We should either see all models schema-based, or none of them so, if this is not the case, - // we log an error message and fall back to use just the first model, whatever it is. - if (numSchemaBased > 0 && numSchemaBased != selectedModels.length) { - MIXED_MODEL_TYPES.increment(); - throw new ClientException( - "You cannot mix schema-based and non-schema-based models in the same request, " - + "models are: " + models.keySet()); - } - - isSchemaBased = selectedModels[0].model.isSchemaBased(); - useLogitScore = rankingParams.isUseLogitScore(); - } - - @Override - protected double computeScore(LinearScoringData data, boolean forExplanation) throws IOException { - ThriftSearchResultFeatures features = - isSchemaBased ? createFeaturesForDocument(data, false).getFeatures() : null; - - double score = 0; - for (SelectedModel selectedModel : selectedModels) { - double modelScore = isSchemaBased - ? new SchemaBasedScoreAccumulator(selectedModel.model).scoreWith(features, useLogitScore) - : new LegacyScoreAccumulator(selectedModel.model).scoreWith(data, useLogitScore); - score += selectedModel.weight * modelScore; - } - - return score; - } - - @Override - protected void generateExplanationForScoring( - LinearScoringData scoringData, boolean isHit, List details) throws IOException { - boolean schemaBased = selectedModels[0].model.isSchemaBased(); - ThriftSearchResultFeatures features = - schemaBased ? createFeaturesForDocument(scoringData, false).getFeatures() : null; - - // 1. Model-based score - final List modelExplanations = Lists.newArrayList(); - float finalScore = 0; - for (SelectedModel selectedModel : selectedModels) { - double modelScore = schemaBased - ? new SchemaBasedScoreAccumulator(selectedModel.model).scoreWith(features, useLogitScore) - : new LegacyScoreAccumulator(selectedModel.model).scoreWith(scoringData, useLogitScore); - float weightedScore = (float) (selectedModel.weight * modelScore); - details.add(Explanation.match( - weightedScore, String.format("model=%s score=%.6f weight=%.3f useLogitScore=%s", - selectedModel.name, modelScore, selectedModel.weight, useLogitScore))); - finalScore += weightedScore; - } - - details.add(Explanation.match( - finalScore, String.format("Total model-based score (hit=%s)", isHit), modelExplanations)); - } - - private static final class SelectedModel { - public final String name; - public final double weight; - public final LightweightLinearModel model; - - private SelectedModel(String name, double weight, LightweightLinearModel model) { - this.name = name; - this.weight = weight; - this.model = model; - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.docx new file mode 100644 index 000000000..6ec15bd5b Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.java deleted file mode 100644 index b105f3490..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RelevanceQuery.java +++ /dev/null @@ -1,164 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.util.Objects; -import java.util.Set; - -import javax.annotation.Nullable; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.results.thriftjava.FieldHitAttribution; - -/** - * A wrapper for a Lucene query which first computes Lucene's query score - * and then delegates to a {@link ScoringFunction} for final score computation. - */ -public class RelevanceQuery extends Query { - private static final Logger LOG = LoggerFactory.getLogger(RelevanceQuery.class.getName()); - - protected final Query luceneQuery; - protected final ScoringFunction scoringFunction; - - // True when the lucene query's score should be ignored for debug explanations. - protected final boolean ignoreLuceneQueryScoreExplanation; - - public RelevanceQuery(Query luceneQuery, ScoringFunction scoringFunction) { - this(luceneQuery, scoringFunction, false); - } - - public RelevanceQuery(Query luceneQuery, - ScoringFunction scoringFunction, - boolean ignoreLuceneQueryScoreExplanation) { - this.luceneQuery = luceneQuery; - this.scoringFunction = scoringFunction; - this.ignoreLuceneQueryScoreExplanation = ignoreLuceneQueryScoreExplanation; - } - - public ScoringFunction getScoringFunction() { - return scoringFunction; - } - - public Query getLuceneQuery() { - return luceneQuery; - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - Query rewritten = luceneQuery.rewrite(reader); - if (rewritten == luceneQuery) { - return this; - } - return new RelevanceQuery(rewritten, scoringFunction, ignoreLuceneQueryScoreExplanation); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - Weight luceneWeight = luceneQuery.createWeight(searcher, scoreMode, boost); - if (luceneWeight == null) { - return null; - } - return new RelevanceWeight(searcher, luceneWeight); - } - - public class RelevanceWeight extends Weight { - private final Weight luceneWeight; - - public RelevanceWeight(IndexSearcher searcher, Weight luceneWeight) { - super(RelevanceQuery.this); - this.luceneWeight = luceneWeight; - } - - @Override - public void extractTerms(Set terms) { - this.luceneWeight.extractTerms(terms); - } - - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return explain(context, doc, null); - } - - /** - * Returns an explanation of the scoring for the given document. - * - * @param context The context of the reader that returned this document. - * @param doc The document. - * @param fieldHitAttribution Per-hit field attribution information. - * @return An explanation of the scoring for the given document. - */ - public Explanation explain(LeafReaderContext context, int doc, - @Nullable FieldHitAttribution fieldHitAttribution) throws IOException { - - Explanation luceneExplanation = Explanation.noMatch("LuceneQuery explain skipped"); - if (!ignoreLuceneQueryScoreExplanation) { - // get Lucene score - try { - luceneExplanation = luceneWeight.explain(context, doc); - } catch (Exception e) { - // We sometimes see exceptions resulting from term queries that do not store - // utf8-text, which TermQuery.toString() assumes. Catch here and allow at least - // scoring function explanations to be returned. - LOG.error("Exception in explain", e); - luceneExplanation = Explanation.noMatch("LuceneQuery explain failed"); - } - } - - Explanation scoringFunctionExplanation; - scoringFunction.setFieldHitAttribution(fieldHitAttribution); - scoringFunctionExplanation = scoringFunction.explain( - context.reader(), doc, luceneExplanation.getValue().floatValue()); - - // just add a wrapper for a better structure of the final explanation - Explanation luceneExplanationWrapper = Explanation.match( - luceneExplanation.getValue(), "LuceneQuery", luceneExplanation); - - return Explanation.match(scoringFunctionExplanation.getValue(), "RelevanceQuery", - scoringFunctionExplanation, luceneExplanationWrapper); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - return luceneWeight.scorer(context); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - return luceneWeight.isCacheable(ctx); - } - } - - @Override - public int hashCode() { - return (luceneQuery == null ? 0 : luceneQuery.hashCode()) - + (scoringFunction == null ? 0 : scoringFunction.hashCode()) * 13; - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof RelevanceQuery)) { - return false; - } - - RelevanceQuery query = RelevanceQuery.class.cast(obj); - return Objects.equals(luceneQuery, query.luceneQuery) - && Objects.equals(scoringFunction, query.scoringFunction); - } - - @Override - public String toString(String field) { - return "RelevanceQuery[q=" + luceneQuery.toString(field) + "]"; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.docx new file mode 100644 index 000000000..8df83fb06 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.java deleted file mode 100644 index deae8ba66..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/RetweetBasedTopTweetsScoringFunction.java +++ /dev/null @@ -1,165 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.relevance.features.MutableFeatureNormalizers; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/** - * A toptweets query cache index selection scoring function that is based purely on retweet counts. - * The goal of this scoring functon is to deprecate itweet score in entirety. - * - * Once all legacy itweet scores are drained from existing earlybird index, new parus score replaces - * existing itweet score position, then this class will be deprecated, a new scoring function - * using parus score shall replace this. - * - * this scoring function is only used in Query Cache for marking top tweets - * in the background. When searched, those tweets are still ranked with linear or model-based - * scoring function. - * - */ -public class RetweetBasedTopTweetsScoringFunction extends ScoringFunction { - private static final double DEFAULT_RECENCY_SCORE_FRACTION = 0.1; - private static final double DEFAULT_SIGMOID_APLHA = 0.008; - private static final int DEFAULT_RECENCY_CENTER_MINUTES = 1080; - - // if you update the default cut off, make sure you update the query cache filter in - // querycache.yml - // - // we know currently each time slice, each partition has about 10K entries in toptweets query - // cache. These are unique tweets. Looking at retweet updates, each time slice, each partition has - // about 650K unique tweets that received retweet. To create roughly similar number of entries in - // query cache, we need top 2% of such tweets, and that sets to min retweet count to 4. - // In this linear scoring function, we will rescale retweet count to [0, 1] range, - // with an input range of [0, 20]. Given the realtime factor's weight of 0.1, that give our - // minimal retweet score threshold to: 4/20 * 0.9 = 0.18. - // Testing on prod showed much higher volume due to the generous setting of max value of 20, - // (highest we have seen is 14). Adjusted to 0.21 which gave us similar volume. - private static final double DEFAULT_CUT_OFF_SCORE = 0.21; - - // Normalize retweet counts from [0, 20] range to [0, 1] range - private static final double MAX_RETWEET_COUNT = 20.0; - private static final double MIN_USER_REPUTATION = 40.0; // matches itweet system threshold - - /** - * The scores for the retweet based top tweets have to be in the [0, 1] interval. So we can't use - * SKIP_HIT as the lowest possible score, and instead have to use Float.MIN_VALUE. - * - * It's OK to use different values for these constants, because they do not interfere with each - * other. This constant is only used in RetweetBasedTopTweetsScoringFunction, which is only used - * to filter the hits for the [score_filter retweets minScore maxScore] operator. So the scores - * returned by RetweetBasedTopTweetsScoringFunction.score() do not have any impact on the final - * hit score. - * - * See EarlybirdLuceneQueryVisitor.visitScoredFilterOperator() and ScoreFilterQuery for more details. - */ - private static final float RETWEET_BASED_TOP_TWEETS_LOWEST_SCORE = Float.MIN_VALUE; - - private final double recencyScoreFraction; - private final double sigmoidAlpha; - private final double cutOffScore; - private final int recencyCenterMinutes; - private final double maxRecency; - - private final int currentTimeSeconds; - - private ThriftSearchResultMetadata metadata = null; - private double score; - private double retweetCount; - - public RetweetBasedTopTweetsScoringFunction(ImmutableSchemaInterface schema) { - this(schema, DEFAULT_RECENCY_SCORE_FRACTION, - DEFAULT_SIGMOID_APLHA, - DEFAULT_CUT_OFF_SCORE, - DEFAULT_RECENCY_CENTER_MINUTES); - } - - /** - * Creates a no decay scoring function (used by top archive). - * Otherwise same as default constructor. - * @param nodecay If no decay is set to true. Alpha is set to 0.0. - */ - public RetweetBasedTopTweetsScoringFunction(ImmutableSchemaInterface schema, boolean nodecay) { - this(schema, DEFAULT_RECENCY_SCORE_FRACTION, - nodecay ? 0.0 : DEFAULT_SIGMOID_APLHA, - DEFAULT_CUT_OFF_SCORE, - DEFAULT_RECENCY_CENTER_MINUTES); - } - - public RetweetBasedTopTweetsScoringFunction(ImmutableSchemaInterface schema, - double recencyScoreFraction, double sigmoidAlpha, - double cutOffScore, int recencyCenterMinutes) { - super(schema); - this.recencyScoreFraction = recencyScoreFraction; - this.sigmoidAlpha = sigmoidAlpha; - this.cutOffScore = cutOffScore; - this.recencyCenterMinutes = recencyCenterMinutes; - this.maxRecency = computeSigmoid(0); - this.currentTimeSeconds = (int) (System.currentTimeMillis() / 1000); - } - - @Override - protected float score(float luceneQueryScore) throws IOException { - // Reset the data for each tweet!!! - metadata = null; - if (documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_OFFENSIVE_FLAG) - || (documentFeatures.getFeatureValue(EarlybirdFieldConstant.USER_REPUTATION) - < MIN_USER_REPUTATION)) { - score = RETWEET_BASED_TOP_TWEETS_LOWEST_SCORE; - } else { - // Note that here we want the post log2 value, as the MAX_RETWEET_COUNT was actually - // set up for that. - retweetCount = MutableFeatureNormalizers.BYTE_NORMALIZER.unnormAndLog2( - (byte) documentFeatures.getFeatureValue(EarlybirdFieldConstant.RETWEET_COUNT)); - final double recencyScore = computeTopTweetRecencyScore(); - - score = (retweetCount / MAX_RETWEET_COUNT) * (1 - recencyScoreFraction) - + recencyScoreFraction * recencyScore; - - if (score < this.cutOffScore) { - score = RETWEET_BASED_TOP_TWEETS_LOWEST_SCORE; - } - } - - return (float) score; - } - - private double computeSigmoid(double x) { - return 1.0f / (1.0f + Math.exp(sigmoidAlpha * (x - recencyCenterMinutes))); - } - - private double computeTopTweetRecencyScore() { - double diffMinutes = - Math.max(0, currentTimeSeconds - timeMapper.getTime(getCurrentDocID())) / 60.0; - return computeSigmoid(diffMinutes) / maxRecency; - } - - @Override - protected Explanation doExplain(float luceneScore) { - return null; - } - - @Override - public ThriftSearchResultMetadata getResultMetadata(ThriftSearchResultMetadataOptions options) { - if (metadata == null) { - metadata = new ThriftSearchResultMetadata() - .setResultType(ThriftSearchResultType.POPULAR) - .setPenguinVersion(EarlybirdConfig.getPenguinVersionByte()); - metadata.setRetweetCount((int) retweetCount); - metadata.setScore(score); - } - return metadata; - } - - @Override - public void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.docx new file mode 100644 index 000000000..d0885b934 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.java deleted file mode 100644 index c2b1a4deb..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunction.java +++ /dev/null @@ -1,213 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.util.List; - -import com.google.common.base.Preconditions; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Explanation; - -import com.twitter.common.collections.Pair; -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.common.query.HitAttributeHelper; -import com.twitter.search.common.relevance.features.EarlybirdDocumentFeatures; -import com.twitter.search.common.results.thriftjava.FieldHitAttribution; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.core.earlybird.index.DocIDToTweetIDMapper; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.TimeMapper; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; -import com.twitter.search.queryparser.query.Query; - -/** - * Defines a ranking function which computes the score of a document that matches a query. - */ -public abstract class ScoringFunction { - /** - * Returned by a {@link #score(int, float)} to indicate that a hit should be scored below all. - * - * We have some equality tests like: - * "if (score == ScoringFunction.SKIP_HIT) {...}" (DefaultScoringFunction#updateRelevanceStats) - * We might also have double to float casts. - * - * Such castings seem to work with the equality test, but there might corner cases when casting - * this float value to a double (and back) might not work properly. - * - * If possible, we should choose a constant that is not in the valid score range. Then we can - * turn the float equality tests into Math.abs(...) < EPSILON tests. - */ - public static final float SKIP_HIT = -Float.MAX_VALUE; - - private final ImmutableSchemaInterface schema; - - // The current doc ID and the reader for the current segment should be private, because we don't - // want sub-classes to incorrectly update them. The doc ID should only be updated by the score() - // and explain() methods, and the reader should only be updated by the setNextReader() method. - private int currentDocID = -1; - - protected DocIDToTweetIDMapper tweetIDMapper = null; - protected TimeMapper timeMapper = null; - protected EarlybirdDocumentFeatures documentFeatures; - - protected int debugMode = 0; - protected HitAttributeHelper hitAttributeHelper; - protected Query query; - - protected FieldHitAttribution fieldHitAttribution; - - public ScoringFunction(ImmutableSchemaInterface schema) { - this.schema = Preconditions.checkNotNull(schema); - } - - protected ImmutableSchemaInterface getSchema() { - return schema; - } - - /** - * Updates the reader that will be used to retrieve the tweet IDs and creation times associated - * with scored doc IDs, as well as the values for various CSFs. Should be called every time the - * searcher starts searching in a new segment. - */ - public void setNextReader(EarlybirdIndexSegmentAtomicReader reader) throws IOException { - tweetIDMapper = reader.getSegmentData().getDocIDToTweetIDMapper(); - timeMapper = reader.getSegmentData().getTimeMapper(); - documentFeatures = new EarlybirdDocumentFeatures(reader); - initializeNextSegment(reader); - } - - public void setHitAttributeHelperAndQuery(HitAttributeHelper newHitAttributeHelper, - Query parsedQuery) { - this.hitAttributeHelper = newHitAttributeHelper; - this.query = parsedQuery; - } - - public void setFieldHitAttribution(FieldHitAttribution fieldHitAttribution) { - this.fieldHitAttribution = fieldHitAttribution; - } - - public void setDebugMode(int debugMode) { - this.debugMode = debugMode; - } - - /** - * Allow scoring functions to perform more per-segment-specific setup. - */ - protected void initializeNextSegment(EarlybirdIndexSegmentAtomicReader reader) - throws IOException { - // Noop by default - } - - // Updates the current document ID and advances all NumericDocValues to this doc ID. - private void setCurrentDocID(int currentDocID) throws IOException { - this.currentDocID = currentDocID; - documentFeatures.advance(currentDocID); - } - - /** - * Returns the current doc ID stored in this scoring function. - */ - public int getCurrentDocID() { - return currentDocID; - } - - /** - * Compute the score for the current hit. This is not expected to be thread safe. - * - * @param internalDocID internal id of the matching hit - * @param luceneQueryScore the score that lucene's text query computed for this hit - */ - public float score(int internalDocID, float luceneQueryScore) throws IOException { - setCurrentDocID(internalDocID); - return score(luceneQueryScore); - } - - /** - * Compute the score for the current hit. This is not expected to be thread safe. - * - * @param luceneQueryScore the score that lucene's text query computed for this hit - */ - protected abstract float score(float luceneQueryScore) throws IOException; - - /** Returns an explanation for the given hit. */ - public final Explanation explain(IndexReader reader, int internalDocID, float luceneScore) - throws IOException { - setNextReader((EarlybirdIndexSegmentAtomicReader) reader); - setCurrentDocID(internalDocID); - return doExplain(luceneScore); - } - - /** Returns an explanation for the current document. */ - protected abstract Explanation doExplain(float luceneScore) throws IOException; - - /** - * Returns the scoring metadata for the current doc ID. - */ - public ThriftSearchResultMetadata getResultMetadata(ThriftSearchResultMetadataOptions options) - throws IOException { - ThriftSearchResultMetadata metadata = new ThriftSearchResultMetadata(); - metadata.setResultType(ThriftSearchResultType.RELEVANCE); - metadata.setPenguinVersion(EarlybirdConfig.getPenguinVersionByte()); - metadata.setLanguage(ThriftLanguage.findByValue( - (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.LANGUAGE))); - metadata.setSignature( - (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.TWEET_SIGNATURE)); - metadata.setIsNullcast(documentFeatures.isFlagSet(EarlybirdFieldConstant.IS_NULLCAST_FLAG)); - return metadata; - } - - /** - * Updates the given ThriftSearchResultsRelevanceStats instance based on the scoring metadata for - * the current doc ID. - */ - public abstract void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats); - - /** - * Score a list of hits. Not thread safe. - */ - public float[] batchScore(List hits) throws IOException { - throw new UnsupportedOperationException("This operation (batchScore) is not implemented!"); - } - - /** - * Collect the features and CSFs for the current document. Used for scoring and generating the - * returned metadata. - */ - public Pair collectFeatures( - float luceneQueryScore) throws IOException { - throw new UnsupportedOperationException("This operation (collectFeatures) is not implemented!"); - } - - /** - * Implement this function to populate the result metadata based on the given scoring data. - * Otherwise, this is a no-op. - * - * Scoring functions that implement this should also implement getScoringData(). - */ - public void populateResultMetadataBasedOnScoringData( - ThriftSearchResultMetadataOptions options, - ThriftSearchResultMetadata metadata, - LinearScoringData data) throws IOException { - // Make sure that the scoring data passed in is null because getScoringDataForCurrentDocument() - // returns null by default and if a subclass overrides one of these two methods, it should - // override both. - Preconditions.checkState(data == null, "LinearScoringData should be null"); - } - - /** - * This should only be called at hit collection time because it relies on the internal doc id. - * - * Scoring functions that implement this should also implement the function - * populateResultMetadataBasedOnScoringData(). - */ - public LinearScoringData getScoringDataForCurrentDocument() { - return null; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.docx new file mode 100644 index 000000000..9a893ee33 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.java deleted file mode 100644 index 5e264e8f8..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/ScoringFunctionProvider.java +++ /dev/null @@ -1,216 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.query.HitAttributeHelper; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.ranking.thriftjava.ThriftScoringFunctionType; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.util.ml.tensorflow_engine.TensorflowModelsManager; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.exception.ClientException; -import com.twitter.search.earlybird.ml.ScoringModelsManager; -import com.twitter.search.earlybird.search.AntiGamingFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.queryparser.query.Query; - -/** - * Returns a scoring function for a particular experiment ID. - * - * Can be used for a/b testing of different scoring formulas. - */ -public abstract class ScoringFunctionProvider { - private static final Logger LOG = LoggerFactory.getLogger(ScoringFunctionProvider.class); - - /** - * Returns the scoring function. - */ - public abstract ScoringFunction getScoringFunction() throws IOException, ClientException; - - public static final String RETWEETS_SCORER_NAME = "retweets"; - public static final String NO_SPAM_SCORER_NAME = "no_spam"; - public static final String TEST_SCORER_NAME = "test"; - - // Whether to avoid time decay when scoring top tweets. - // Top archive does not need time decay. - private static final boolean TOP_TWEET_WITH_DECAY = - EarlybirdConfig.getBool("top_tweet_scoring_with_decay", true); - - /** - * Abstract class that can be used for ScoringFunctions that don't throw a ClientException. - * - * It does throw an IOException but it doesn't throw a ClientException so the name can be a bit - * misleading. - */ - public abstract static class NamedScoringFunctionProvider extends ScoringFunctionProvider { - /** - * Returns the scoring function. - */ - public abstract ScoringFunction getScoringFunction() throws IOException; - } - - /** - * Returns the scoring function provider with the given name, or null if no such provider exists. - */ - public static NamedScoringFunctionProvider getScoringFunctionProviderByName( - String name, final ImmutableSchemaInterface schema) { - if (name.equals(NO_SPAM_SCORER_NAME)) { - return new NamedScoringFunctionProvider() { - @Override - public ScoringFunction getScoringFunction() throws IOException { - return new SpamVectorScoringFunction(schema); - } - }; - } else if (name.equals(RETWEETS_SCORER_NAME)) { - return new NamedScoringFunctionProvider() { - @Override - public ScoringFunction getScoringFunction() throws IOException { - // Production top tweet actually uses this. - if (TOP_TWEET_WITH_DECAY) { - return new RetweetBasedTopTweetsScoringFunction(schema); - } else { - return new RetweetBasedTopTweetsScoringFunction(schema, true); - } - } - }; - } else if (name.equals(TEST_SCORER_NAME)) { - return new NamedScoringFunctionProvider() { - @Override - public ScoringFunction getScoringFunction() throws IOException { - return new TestScoringFunction(schema); - } - }; - } - return null; - } - - /** - * Returns default scoring functions for different scoring function type - * and provides fallback behavior if model-based scoring function fails - */ - public static class DefaultScoringFunctionProvider extends ScoringFunctionProvider { - private final EarlybirdRequest request; - private final ImmutableSchemaInterface schema; - private final ThriftSearchQuery searchQuery; - private final AntiGamingFilter antiGamingFilter; - private final UserTable userTable; - private final HitAttributeHelper hitAttributeHelper; - private final Query parsedQuery; - private final ScoringModelsManager scoringModelsManager; - private final TensorflowModelsManager tensorflowModelsManager; - - private static final SearchCounter MODEL_BASED_SCORING_FUNCTION_CREATED = - SearchCounter.export("model_based_scoring_function_created"); - private static final SearchCounter MODEL_BASED_FALLBACK_TO_LINEAR_SCORING_FUNCTION = - SearchCounter.export("model_based_fallback_to_linear_scoring_function"); - - private static final SearchCounter TENSORFLOW_BASED_SCORING_FUNCTION_CREATED = - SearchCounter.export("tensorflow_based_scoring_function_created"); - private static final SearchCounter TENSORFLOW_BASED_FALLBACK_TO_LINEAR_SCORING_FUNCTION = - SearchCounter.export("tensorflow_fallback_to_linear_function_scoring_function"); - - public DefaultScoringFunctionProvider( - final EarlybirdRequest request, - final ImmutableSchemaInterface schema, - final ThriftSearchQuery searchQuery, - final AntiGamingFilter antiGamingFilter, - final UserTable userTable, - final HitAttributeHelper hitAttributeHelper, - final Query parsedQuery, - final ScoringModelsManager scoringModelsManager, - final TensorflowModelsManager tensorflowModelsManager) { - this.request = request; - this.schema = schema; - this.searchQuery = searchQuery; - this.antiGamingFilter = antiGamingFilter; - this.userTable = userTable; - this.hitAttributeHelper = hitAttributeHelper; - this.parsedQuery = parsedQuery; - this.scoringModelsManager = scoringModelsManager; - this.tensorflowModelsManager = tensorflowModelsManager; - } - - @Override - public ScoringFunction getScoringFunction() throws IOException, ClientException { - if (searchQuery.isSetRelevanceOptions() - && searchQuery.getRelevanceOptions().isSetRankingParams()) { - ThriftRankingParams params = searchQuery.getRelevanceOptions().getRankingParams(); - ThriftScoringFunctionType type = params.isSetType() - ? params.getType() : ThriftScoringFunctionType.LINEAR; // default type - switch (type) { - case LINEAR: - return createLinear(); - case MODEL_BASED: - if (scoringModelsManager.isEnabled()) { - MODEL_BASED_SCORING_FUNCTION_CREATED.increment(); - return createModelBased(); - } else { - // From ScoringModelsManager.NO_OP_MANAGER. Fall back to LinearScoringFunction - MODEL_BASED_FALLBACK_TO_LINEAR_SCORING_FUNCTION.increment(); - return createLinear(); - } - case TENSORFLOW_BASED: - if (tensorflowModelsManager.isEnabled()) { - TENSORFLOW_BASED_SCORING_FUNCTION_CREATED.increment(); - return createTensorflowBased(); - } else { - // Fallback to linear scoring if tf manager is disabled - TENSORFLOW_BASED_FALLBACK_TO_LINEAR_SCORING_FUNCTION.increment(); - return createLinear(); - } - case TOPTWEETS: - return createTopTweets(); - default: - throw new IllegalArgumentException("Unknown scoring type: in " + searchQuery); - } - } else { - LOG.error("No relevance options provided query = " + searchQuery); - return new DefaultScoringFunction(schema); - } - } - - private ScoringFunction createLinear() throws IOException { - LinearScoringFunction scoringFunction = new LinearScoringFunction( - schema, searchQuery, antiGamingFilter, ThriftSearchResultType.RELEVANCE, - userTable); - scoringFunction.setHitAttributeHelperAndQuery(hitAttributeHelper, parsedQuery); - - return scoringFunction; - } - - /** - * For model based scoring function, ClientException will be throw if client selects an - * unknown model for scoring manager. - * {@link com.twitter.search.earlybird.search.relevance.scoring.ModelBasedScoringFunction} - */ - private ScoringFunction createModelBased() throws IOException, ClientException { - ModelBasedScoringFunction scoringFunction = new ModelBasedScoringFunction( - schema, searchQuery, antiGamingFilter, ThriftSearchResultType.RELEVANCE, userTable, - scoringModelsManager); - scoringFunction.setHitAttributeHelperAndQuery(hitAttributeHelper, parsedQuery); - - return scoringFunction; - } - - private ScoringFunction createTopTweets() throws IOException { - return new LinearScoringFunction( - schema, searchQuery, antiGamingFilter, ThriftSearchResultType.POPULAR, userTable); - } - - private TensorflowBasedScoringFunction createTensorflowBased() - throws IOException, ClientException { - TensorflowBasedScoringFunction tfScoringFunction = new TensorflowBasedScoringFunction( - request, schema, searchQuery, antiGamingFilter, - ThriftSearchResultType.RELEVANCE, userTable, tensorflowModelsManager); - tfScoringFunction.setHitAttributeHelperAndQuery(hitAttributeHelper, parsedQuery); - return tfScoringFunction; - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.docx new file mode 100644 index 000000000..ce2d48a17 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.java deleted file mode 100644 index 1d45ad642..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SpamVectorScoringFunction.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.relevance.features.RelevanceSignalConstants; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -public class SpamVectorScoringFunction extends ScoringFunction { - private static final int MIN_TWEEPCRED_WITH_LINK = - EarlybirdConfig.getInt("min_tweepcred_with_non_whitelisted_link", 25); - - // The engagement threshold that prevents us from filtering users with low tweepcred. - private static final int ENGAGEMENTS_NO_FILTER = 1; - - @VisibleForTesting - static final float NOT_SPAM_SCORE = 0.5f; - @VisibleForTesting - static final float SPAM_SCORE = -0.5f; - - public SpamVectorScoringFunction(ImmutableSchemaInterface schema) { - super(schema); - } - - @Override - protected float score(float luceneQueryScore) throws IOException { - if (documentFeatures.isFlagSet(EarlybirdFieldConstant.FROM_VERIFIED_ACCOUNT_FLAG)) { - return NOT_SPAM_SCORE; - } - - int tweepCredThreshold = 0; - if (documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_LINK_FLAG) - && !documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_IMAGE_URL_FLAG) - && !documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_VIDEO_URL_FLAG) - && !documentFeatures.isFlagSet(EarlybirdFieldConstant.HAS_NEWS_URL_FLAG)) { - // Contains a non-media non-news link, definite spam vector. - tweepCredThreshold = MIN_TWEEPCRED_WITH_LINK; - } - - int tweepcred = (int) documentFeatures.getFeatureValue(EarlybirdFieldConstant.USER_REPUTATION); - - // For new user, tweepcred is set to a sentinel value of -128, specified at - // src/thrift/com/twitter/search/common/indexing/status.thrift - if (tweepcred >= tweepCredThreshold - || tweepcred == (int) RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL) { - return NOT_SPAM_SCORE; - } - - double retweetCount = - documentFeatures.getUnnormalizedFeatureValue(EarlybirdFieldConstant.RETWEET_COUNT); - double replyCount = - documentFeatures.getUnnormalizedFeatureValue(EarlybirdFieldConstant.REPLY_COUNT); - double favoriteCount = - documentFeatures.getUnnormalizedFeatureValue(EarlybirdFieldConstant.FAVORITE_COUNT); - - // If the tweet has enough engagements, do not mark it as spam. - if (retweetCount + replyCount + favoriteCount >= ENGAGEMENTS_NO_FILTER) { - return NOT_SPAM_SCORE; - } - - return SPAM_SCORE; - } - - @Override - protected Explanation doExplain(float luceneScore) { - return null; - } - - @Override - public ThriftSearchResultMetadata getResultMetadata(ThriftSearchResultMetadataOptions options) { - return null; - } - - @Override - public void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.docx new file mode 100644 index 000000000..8eccee1e7 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.java deleted file mode 100644 index 67df06d95..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/SparseTensor.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -// Ideally, this part should live somewhere in the Cortex common -// code. Today, it is not possible to create -// a `SparseTensor` that relies only on ByteBuffer. -public class SparseTensor { - - private ByteBuffer sparseIndices; - private ByteBuffer sparseValues; - private ByteBuffer sparseShape; - - private int numDocs; - private final long[] sparseShapeShapeDimension = new long[] {2L}; - private final long inputBitSize = 1 << 63; - - private long numRecordsSeen = 0; - private final long numFeatures; - private int numValuesSeen; - - public SparseTensor(int numDocs, int numFeatures) { - this.numDocs = numDocs; - this.numFeatures = (long) numFeatures; - this.sparseValues = - ByteBuffer - .allocate(numFeatures * numDocs * Float.BYTES) - .order(ByteOrder.LITTLE_ENDIAN); - this.sparseIndices = - ByteBuffer - .allocate(2 * numFeatures * numDocs * Long.BYTES) - .order(ByteOrder.LITTLE_ENDIAN); - this.sparseShape = - ByteBuffer - .allocate(2 * Long.BYTES) - .order(ByteOrder.LITTLE_ENDIAN); - } - - public void incNumRecordsSeen() { - numRecordsSeen++; - } - - /** - * Adds the given value to this tensor. - */ - public void addValue(long featureId, float value) { - sparseValues.putFloat(value); - sparseIndices.putLong(numRecordsSeen); - sparseIndices.putLong(featureId); - numValuesSeen++; - } - - public ByteBuffer getSparseValues() { - sparseValues.limit(numValuesSeen * Float.BYTES); - sparseValues.rewind(); - return sparseValues; - } - - public long[] getSparseValuesShape() { - return new long[] {numValuesSeen}; - } - - public long[] getSparseIndicesShape() { - return new long[] {numValuesSeen, 2L}; - } - - public long[] getSparseShapeShape() { - return sparseShapeShapeDimension; - } - - public ByteBuffer getSparseIndices() { - sparseIndices.limit(2 * numValuesSeen * Long.BYTES); - sparseIndices.rewind(); - return sparseIndices; - } - - /** - * Returns the sparse shape for this tensor. - */ - public ByteBuffer getSparseShape() { - sparseShape.putLong(numRecordsSeen); - sparseShape.putLong(inputBitSize); - sparseShape.rewind(); - return sparseShape; - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.docx new file mode 100644 index 000000000..3f4aa784e Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.java deleted file mode 100644 index 497f4bbc0..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TensorflowBasedScoringFunction.java +++ /dev/null @@ -1,339 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import java.io.IOException; -import java.nio.FloatBuffer; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; - -import org.apache.lucene.search.Explanation; -import org.tensorflow.Tensor; - -import com.twitter.common.collections.Pair; -import com.twitter.search.common.constants.thriftjava.ThriftQuerySource; -import com.twitter.search.common.features.EarlybirdRankingDerivedFeature; -import com.twitter.search.common.features.FeatureHandler; -import com.twitter.search.common.features.thrift.ThriftSearchResultFeatures; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.util.ml.tensorflow_engine.TensorflowModelsManager; -import com.twitter.search.earlybird.EarlybirdSearcher; -import com.twitter.search.earlybird.common.userupdates.UserTable; -import com.twitter.search.earlybird.exception.ClientException; -import com.twitter.search.earlybird.search.AntiGamingFilter; -import com.twitter.search.earlybird.search.relevance.LinearScoringData; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchRelevanceOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.modeling.common.TweetFeaturesUtils; -import com.twitter.tfcompute_java.TFModelRunner; - -/** - * TensorflowBasedScoringFunction relies on a TF model for scoring tweets - * Only the `batchScore` part is implemented - */ -public class TensorflowBasedScoringFunction extends FeatureBasedScoringFunction { - private final TFModelRunner tfModelRunner; - - // https://stackoverflow.com/questions/37849322/how-to-understand-the-term-tensor-in-tensorflow - // for more information on this notation - in short, a TF graph is made - // of TF operations and doesn't have a first order notion of tensors - // The notation : will maps to the output of the - // contained in the TF graph. - private static final String INPUT_VALUES = "input_sparse_tensor_values:0"; - private static final String INPUT_INDICES = "input_sparse_tensor_indices:0"; - private static final String INPUT_SHAPE = "input_sparse_tensor_shape:0"; - private static final String OUTPUT_NODE = "output_scores:0"; - - private final Map featureSchemaIdToMlApiId; - private final Map tweetIdToScoreMap = new HashMap<>(); - private final EarlybirdRequest request; - - public TensorflowBasedScoringFunction( - EarlybirdRequest request, - ImmutableSchemaInterface schema, - ThriftSearchQuery searchQuery, - AntiGamingFilter antiGamingFilter, - ThriftSearchResultType searchResultType, - UserTable userTable, - TensorflowModelsManager tensorflowModelsManager - ) throws IOException, ClientException { - super( - "TensorflowBasedScoringFunction", - schema, - searchQuery, - antiGamingFilter, - searchResultType, - userTable - ); - this.request = request; - String modelName = searchQuery.getRelevanceOptions().getRankingParams().selectedTensorflowModel; - this.featureSchemaIdToMlApiId = tensorflowModelsManager.getFeatureSchemaIdToMlApiId(); - - if (modelName == null) { - throw new ClientException("Scoring type is TENSORFLOW_BASED but no model was selected"); - } else if (!tensorflowModelsManager.getModel(modelName).isPresent()) { - throw new ClientException( - "Scoring type is TENSORFLOW_BASED. Model " - + modelName - + " is not present." - ); - } - - if (searchQuery.getRelevanceOptions().getRankingParams().isEnableHitDemotion()) { - throw new ClientException( - "Hit attribute demotion is not supported with TENSORFLOW_BASED scoring type"); - } - - tfModelRunner = tensorflowModelsManager.getModel(modelName).get(); - } - - /** - * Single item scoring just returns the lucene score to be used during the batching phase. - */ - @Override - protected float score(float luceneQueryScore) { - return luceneQueryScore; - } - - @Override - public Pair collectFeatures( - float luceneQueryScore) throws IOException { - LinearScoringData linearScoringData = updateLinearScoringData(luceneQueryScore); - ThriftSearchResultFeatures features = - createFeaturesForDocument(linearScoringData, true).getFeatures(); - - return new Pair<>(linearScoringData, features); - } - - @Override - protected FeatureHandler createFeaturesForDocument( - LinearScoringData linearScoringData, - boolean ignoreDefaultValues) throws IOException { - return super.createFeaturesForDocument(linearScoringData, - ignoreDefaultValues) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_TREND_CLICK, - request.querySource == ThriftQuerySource.TREND_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_TYPED_QUERY, - request.querySource == ThriftQuerySource.TYPED_QUERY) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_TYPEAHEAD_CLICK, - request.querySource == ThriftQuerySource.TYPEAHEAD_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_HASHTAG_CLICK, - request.querySource == ThriftQuerySource.RECENT_SEARCH_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_RECENT_SEARCH_CLICK, - request.querySource == ThriftQuerySource.RECENT_SEARCH_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_PROFILE_CLICK, - request.querySource == ThriftQuerySource.PROFILE_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_API_CALL, - request.querySource == ThriftQuerySource.API_CALL) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_PROMOTED_TREND_CLICK, - request.querySource == ThriftQuerySource.PROMOTED_TREND_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_SAVED_SEARCH_CLICK, - request.querySource == ThriftQuerySource.SAVED_SEARCH_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_CASHTAG_CLICK, - request.querySource == ThriftQuerySource.CASHTAG_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_SPELLING_EXPANSION_REVERT_CLICK, - request.querySource == ThriftQuerySource.SPELLING_EXPANSION_REVERT_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_SPELLING_SUGGESTION_CLICK, - request.querySource == ThriftQuerySource.SPELLING_SUGGESTION_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_LOGGED_OUT_HOME_TREND_CLICK, - request.querySource == ThriftQuerySource.LOGGED_OUT_HOME_TREND_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_RELATED_QUERY_CLICK, - request.querySource == ThriftQuerySource.RELATED_QUERY_CLICK) - .addBoolean(EarlybirdRankingDerivedFeature.QUERY_SOURCE_AUTO_SPELL_CORRECT_REVERT_CLICK, - request.querySource == ThriftQuerySource.AUTO_SPELL_CORRECT_REVERT_CLICK); - } - - /** - * Return scores computed in batchScore() if forExplanation is true. - */ - @Override - protected double computeScore(LinearScoringData data, boolean forExplanation) { - Preconditions.checkState(forExplanation, - "forExplanation is false. computeScore() should only be used for explanation creation"); - return tweetIdToScoreMap.get(tweetIDMapper.getTweetID(getCurrentDocID())); - } - - @Override - protected void generateExplanationForScoring( - LinearScoringData scoringData, boolean isHit, List details) { - } - - @VisibleForTesting - SparseTensor createInputTensor(ThriftSearchResultFeatures[] featuresForDocs) { - // Moving this across outside of the request path - // would reduce the allocation cost and make the `ByteBuffer`s - // long lived - would need one per thread. - SparseTensor sparseTensor = - new SparseTensor(featuresForDocs.length, featureSchemaIdToMlApiId.size()); - for (ThriftSearchResultFeatures features : featuresForDocs) { - updateSparseTensor(sparseTensor, features); - } - return sparseTensor; - } - - private void addSchemaBooleanFeatures(SparseTensor sparseTensor, - Map booleanMap) { - if (booleanMap == null || booleanMap.isEmpty()) { - return; - } - for (Map.Entry entry : booleanMap.entrySet()) { - Preconditions.checkState(featureSchemaIdToMlApiId.containsKey(entry.getKey())); - sparseTensor.addValue( - featureSchemaIdToMlApiId.get(entry.getKey()), entry.getValue() ? 1f : 0f); - } - } - - private void addSchemaContinuousFeatures(SparseTensor sparseTensor, - Map valueMap) { - if (valueMap == null || valueMap.isEmpty()) { - return; - } - for (Map.Entry entry : valueMap.entrySet()) { - Integer id = entry.getKey(); - // SEARCH-26795 - if (!TweetFeaturesUtils.isFeatureDiscrete(id)) { - Preconditions.checkState(featureSchemaIdToMlApiId.containsKey(id)); - sparseTensor.addValue( - featureSchemaIdToMlApiId.get(id), entry.getValue().floatValue()); - } - } - } - - private void updateSparseTensor(SparseTensor sparseTensor, ThriftSearchResultFeatures features) { - addSchemaBooleanFeatures(sparseTensor, features.getBoolValues()); - addSchemaContinuousFeatures(sparseTensor, features.getIntValues()); - addSchemaContinuousFeatures(sparseTensor, features.getLongValues()); - addSchemaContinuousFeatures(sparseTensor, features.getDoubleValues()); - - sparseTensor.incNumRecordsSeen(); - } - - private float[] batchScoreInternal(ThriftSearchResultFeatures[] featuresForDocs) { - int nbDocs = featuresForDocs.length; - float[] backingArrayResults = new float[nbDocs]; - SparseTensor sparseTensor = createInputTensor(featuresForDocs); - Tensor sparseValues = - Tensor.create( - Float.class, - sparseTensor.getSparseValuesShape(), - sparseTensor.getSparseValues()); - Tensor sparseIndices = - Tensor.create( - Long.class, - sparseTensor.getSparseIndicesShape(), - sparseTensor.getSparseIndices()); - Tensor sparseShape = - Tensor.create( - Long.class, - sparseTensor.getSparseShapeShape(), - sparseTensor.getSparseShape()); - Map> inputMap = ImmutableMap.of( - INPUT_VALUES, sparseValues, - INPUT_INDICES, sparseIndices, - INPUT_SHAPE, sparseShape - ); - List output = ImmutableList.of(OUTPUT_NODE); - - Map> outputs = tfModelRunner.run( - inputMap, - output, - ImmutableList.of() - ); - Tensor outputTensor = outputs.get(OUTPUT_NODE); - try { - FloatBuffer finalResultBuffer = - FloatBuffer.wrap(backingArrayResults, 0, nbDocs); - - outputTensor.writeTo(finalResultBuffer); - } finally { - // Close tensors to avoid memory leaks - sparseValues.close(); - sparseIndices.close(); - sparseShape.close(); - if (outputTensor != null) { - outputTensor.close(); - } - } - return backingArrayResults; - } - - /** - * Compute the score for a list of hits. Not thread safe. - * @return Array of scores - */ - @Override - public float[] batchScore(List hits) throws IOException { - ThriftSearchResultFeatures[] featuresForDocs = new ThriftSearchResultFeatures[hits.size()]; - - for (int i = 0; i < hits.size(); i++) { - // This is a gigantic allocation, but the models are trained to depend on unset values having - // a default. - BatchHit hit = hits.get(i); - ThriftSearchResultFeatures features = hit.getFeatures().deepCopy(); - - // Adjust features of a hit based on overrides provided by relevance options. Should mostly - // be used for debugging purposes. - adjustHitScoringFeatures(hit, features); - - setDefaultFeatureValues(features); - featuresForDocs[i] = features; - } - - float[] scores = batchScoreInternal(featuresForDocs); - float[] finalScores = new float[hits.size()]; - - for (int i = 0; i < hits.size(); i++) { - LinearScoringData data = hits.get(i).getScoringData(); - if (data.skipReason != null && data.skipReason != LinearScoringData.SkipReason.NOT_SKIPPED) { - // If the hit should be skipped, overwrite the score with SKIP_HIT - scores[i] = SKIP_HIT; - } - - // If explanations enabled, Add scores to map. Will be used in computeScore() - if (EarlybirdSearcher.explanationsEnabled(debugMode)) { - tweetIdToScoreMap.put(hits.get(i).getTweetID(), scores[i]); - } - - finalScores[i] = postScoreComputation( - data, - scores[i], - false, // cannot get the hit attribution info for this hit at this point in time - null); - } - return finalScores; - } - - private void adjustHitScoringFeatures(BatchHit hit, ThriftSearchResultFeatures features) { - - if (request.isSetSearchQuery() && request.getSearchQuery().isSetRelevanceOptions()) { - ThriftSearchRelevanceOptions relevanceOptions = - request.getSearchQuery().getRelevanceOptions(); - - if (relevanceOptions.isSetPerTweetFeaturesOverride() - && relevanceOptions.getPerTweetFeaturesOverride().containsKey(hit.getTweetID())) { - overrideFeatureValues( - features, - relevanceOptions.getPerTweetFeaturesOverride().get(hit.getTweetID())); - } - - if (relevanceOptions.isSetPerUserFeaturesOverride() - && relevanceOptions.getPerUserFeaturesOverride().containsKey( - hit.getScoringData().fromUserId)) { - overrideFeatureValues( - features, - relevanceOptions.getPerUserFeaturesOverride().get(hit.getScoringData().fromUserId)); - } - - if (relevanceOptions.isSetGlobalFeaturesOverride()) { - overrideFeatureValues( - features, relevanceOptions.getGlobalFeaturesOverride()); - } - } - } -} diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.docx b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.docx new file mode 100644 index 000000000..57f2552e3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.docx differ diff --git a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.java b/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.java deleted file mode 100644 index 6e0c6a36f..000000000 --- a/src/java/com/twitter/search/earlybird/search/relevance/scoring/TestScoringFunction.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.search.earlybird.search.relevance.scoring; - -import org.apache.lucene.search.Explanation; - -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadataOptions; -import com.twitter.search.earlybird.thrift.ThriftSearchResultType; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/** - * A dummy scoring function for test, the score is always tweetId/10000.0 - * Since score_filter: operator requires all score to be between [0, 1], if you want to use this - * with it, don't use any tweet id larger than 10000 in your test. - */ -public class TestScoringFunction extends ScoringFunction { - private ThriftSearchResultMetadata metadata = null; - private float score; - - public TestScoringFunction(ImmutableSchemaInterface schema) { - super(schema); - } - - @Override - protected float score(float luceneQueryScore) { - long tweetId = tweetIDMapper.getTweetID(getCurrentDocID()); - this.score = (float) (tweetId / 10000.0); - System.out.println(String.format("score for tweet %10d is %6.3f", tweetId, score)); - return this.score; - } - - @Override - protected Explanation doExplain(float luceneScore) { - return null; - } - - @Override - public ThriftSearchResultMetadata getResultMetadata(ThriftSearchResultMetadataOptions options) { - if (metadata == null) { - metadata = new ThriftSearchResultMetadata() - .setResultType(ThriftSearchResultType.RELEVANCE) - .setPenguinVersion(EarlybirdConfig.getPenguinVersionByte()); - metadata.setScore(score); - } - return metadata; - } - - @Override - public void updateRelevanceStats(ThriftSearchResultsRelevanceStats relevanceStats) { - } -} diff --git a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.docx b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.docx new file mode 100644 index 000000000..5eb7b96a7 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.java b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.java deleted file mode 100644 index db3afc3cf..000000000 --- a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataProvider.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.search.earlybird.segment; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Set; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.partitioning.base.Segment; -import com.twitter.search.common.util.io.dl.DLReaderWriterFactory; -import com.twitter.search.common.util.io.dl.SegmentDLUtil; -import com.twitter.search.earlybird.EarlybirdIndexConfig; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; - -/** - * An implementation of SegmentDataProvider using DistributedLog. - */ -public class DLSegmentDataProvider implements SegmentDataProvider { - private final int hashPartitionID; - private final DLReaderWriterFactory dlFactory; - private final SegmentDataReaderSet readerSet; - - public DLSegmentDataProvider( - int hashPartitionID, - EarlybirdIndexConfig earlybirdIndexConfig, - DLReaderWriterFactory dlReaderWriterFactory) throws IOException { - this(hashPartitionID, earlybirdIndexConfig, dlReaderWriterFactory, - Clock.SYSTEM_CLOCK); - } - - public DLSegmentDataProvider( - int hashPartitionID, - EarlybirdIndexConfig earlybirdIndexConfig, - DLReaderWriterFactory dlReaderWriterFactory, - Clock clock) throws IOException { - this.hashPartitionID = hashPartitionID; - this.dlFactory = dlReaderWriterFactory; - this.readerSet = new DLSegmentDataReaderSet( - dlFactory, - earlybirdIndexConfig, - clock); - } - - @Override - public SegmentDataReaderSet getSegmentDataReaderSet() { - return readerSet; - } - - @Override - public List newSegmentList() throws IOException { - Set segmentNames = SegmentDLUtil.getSegmentNames(dlFactory, null, hashPartitionID); - List segmentList = new ArrayList<>(segmentNames.size()); - for (String segmentName : segmentNames) { - Segment segment = Segment.fromSegmentName(segmentName, EarlybirdConfig.getMaxSegmentSize()); - segmentList.add(segment); - } - // Sort the segments by ID. - Collections.sort(segmentList); - return segmentList; - } -} diff --git a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.docx b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.docx new file mode 100644 index 000000000..faf17f308 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.java b/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.java deleted file mode 100644 index 88aa02a5c..000000000 --- a/src/java/com/twitter/search/earlybird/segment/DLSegmentDataReaderSet.java +++ /dev/null @@ -1,237 +0,0 @@ -package com.twitter.search.earlybird.segment; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.TimeUnit; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; - -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.indexing.thriftjava.ThriftVersionedEvents; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchCustomGauge; -import com.twitter.search.common.metrics.SearchRequestStats; -import com.twitter.search.common.schema.earlybird.EarlybirdThriftDocumentUtil; -import com.twitter.search.common.schema.thriftjava.ThriftIndexingEvent; -import com.twitter.search.common.util.io.ReaderWithStatsFactory; -import com.twitter.search.common.util.io.TransformingRecordReader; -import com.twitter.search.common.util.io.dl.DLMultiStreamReader; -import com.twitter.search.common.util.io.dl.DLReaderWriterFactory; -import com.twitter.search.common.util.io.dl.DLTimestampedReaderFactory; -import com.twitter.search.common.util.io.dl.SegmentDLUtil; -import com.twitter.search.common.util.io.recordreader.RecordReader; -import com.twitter.search.common.util.io.recordreader.RecordReaderFactory; -import com.twitter.search.common.util.thrift.ThriftUtils; -import com.twitter.search.earlybird.EarlybirdIndexConfig; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.document.DocumentFactory; -import com.twitter.search.earlybird.document.TweetDocument; -import com.twitter.search.earlybird.partition.SegmentInfo; - -public class DLSegmentDataReaderSet implements SegmentDataReaderSet { - private static final Logger LOG = LoggerFactory.getLogger(DLSegmentDataReaderSet.class); - - public static final SearchRequestStats STATUS_DL_READ_STATS = - SearchRequestStats.export("status_dlreader", TimeUnit.MICROSECONDS, false); - private static final SearchRequestStats UPDATE_EVENT_DL_READ_STATS = - SearchRequestStats.export("update_events_dlreader", TimeUnit.MICROSECONDS, false); - // The number of tweets not indexed because they failed deserialization. - private static final SearchCounter STATUS_SKIPPED_DUE_TO_FAILED_DESERIALIZATION_COUNTER = - SearchCounter.export("statuses_skipped_due_to_failed_deserialization"); - - @VisibleForTesting - public static final int FRESH_READ_THRESHOLD = (int) TimeUnit.MINUTES.toMillis(1); - - private final int documentReadFreshnessThreshold = - EarlybirdConfig.getInt("documents_reader_freshness_threshold_millis", 10000); - private final int updateReadFreshnessThreshold = - EarlybirdConfig.getInt("updates_freshness_threshold_millis", FRESH_READ_THRESHOLD); - private final int dlReaderVersion = EarlybirdConfig.getInt("dl_reader_version"); - - private final DLReaderWriterFactory dlFactory; - private final RecordReaderFactory dlUpdateEventsFactory; - private final EarlybirdIndexConfig indexConfig; - private final Clock clock; - - private RecordReader documentReader; - - // RecordReaders for update events that span all live segments. - private final RecordReader updateEventsReader; - private final DLMultiStreamReader updateEventsMultiReader; - private final Map> updateEventReaders = new HashMap<>(); - - DLSegmentDataReaderSet( - DLReaderWriterFactory dlFactory, - final EarlybirdIndexConfig indexConfig, - Clock clock) throws IOException { - this.dlFactory = dlFactory; - this.indexConfig = indexConfig; - this.clock = clock; - - this.dlUpdateEventsFactory = new ReaderWithStatsFactory( - new DLTimestampedReaderFactory(dlFactory, clock, updateReadFreshnessThreshold), - UPDATE_EVENT_DL_READ_STATS); - this.updateEventsMultiReader = - new DLMultiStreamReader("update_events", dlUpdateEventsFactory, true, clock); - this.updateEventsReader = - new TransformingRecordReader<>(updateEventsMultiReader, record -> - (record != null) ? deserializeTVE(record.getBytes()) : null); - - SearchCustomGauge.export("open_dl_update_events_streams", updateEventReaders::size); - } - - private ThriftVersionedEvents deserializeTVE(byte[] bytes) { - ThriftVersionedEvents event = new ThriftVersionedEvents(); - try { - ThriftUtils.fromCompactBinaryFormat(bytes, event); - return event; - } catch (TException e) { - LOG.error("error deserializing TVE", e); - return null; - } - } - - @Override - public void attachDocumentReaders(SegmentInfo segmentInfo) throws IOException { - // Close any document reader left open before. - if (documentReader != null) { - LOG.warn("Previous documentReader not closed: {}", documentReader); - completeSegmentDocs(segmentInfo); - } - documentReader = newDocumentReader(segmentInfo); - } - - @Override - public void attachUpdateReaders(SegmentInfo segmentInfo) throws IOException { - if (updateEventsMultiReader == null) { - return; - } - - String segmentName = segmentInfo.getSegmentName(); - if (getUpdateEventsReaderForSegment(segmentInfo) != null) { - LOG.info("Update events reader for segment {} is already attached.", segmentName); - return; - } - - long updateEventStreamOffsetTimestamp = segmentInfo.getUpdatesStreamOffsetTimestamp(); - LOG.info("Attaching update events reader for segment {} with timestamp: {}.", - segmentName, updateEventStreamOffsetTimestamp); - - String topic = SegmentDLUtil.getDLTopicForUpdateEvents(segmentName, dlReaderVersion); - RecordReader recordReader = - dlUpdateEventsFactory.newRecordReaderForTimestamp(topic, updateEventStreamOffsetTimestamp); - updateEventsMultiReader.addRecordReader(recordReader, topic); - updateEventReaders.put(segmentInfo.getTimeSliceID(), - new TransformingRecordReader<>(recordReader, this::deserializeTVE)); - } - - @Override - public void stopAll() { - if (documentReader != null) { - documentReader.close(); - } - if (updateEventsReader != null) { - updateEventsReader.close(); - } - try { - dlFactory.close(); - } catch (IOException e) { - LOG.error("Exception while closing DL factory", e); - } - } - - @Override - public void completeSegmentDocs(SegmentInfo segmentInfo) { - if (documentReader != null) { - documentReader.close(); - documentReader = null; - } - } - - @Override - public void stopSegmentUpdates(SegmentInfo segmentInfo) { - if (updateEventsMultiReader != null) { - updateEventsMultiReader.removeStream( - SegmentDLUtil.getDLTopicForUpdateEvents(segmentInfo.getSegmentName(), dlReaderVersion)); - updateEventReaders.remove(segmentInfo.getTimeSliceID()); - } - } - - @Override - public RecordReader newDocumentReader(SegmentInfo segmentInfo) throws IOException { - String topic = SegmentDLUtil.getDLTopicForTweets(segmentInfo.getSegmentName(), - EarlybirdConfig.getPenguinVersion(), dlReaderVersion); - final long timeSliceId = segmentInfo.getTimeSliceID(); - final DocumentFactory docFactory = indexConfig.createDocumentFactory(); - - // Create the underlying DLRecordReader wrapped with the tweet reader stats. - RecordReader dlReader = new ReaderWithStatsFactory( - new DLTimestampedReaderFactory( - dlFactory, - clock, - documentReadFreshnessThreshold), - STATUS_DL_READ_STATS) - .newRecordReader(topic); - - // Create the wrapped reader which transforms serialized byte[] to TweetDocument. - return new TransformingRecordReader<>( - dlReader, - new Function() { - @Override - public TweetDocument apply(byte[] input) { - ThriftIndexingEvent event = new ThriftIndexingEvent(); - try { - ThriftUtils.fromCompactBinaryFormat(input, event); - } catch (TException e) { - LOG.error("Could not deserialize status document", e); - STATUS_SKIPPED_DUE_TO_FAILED_DESERIALIZATION_COUNTER.increment(); - return null; - } - - Preconditions.checkNotNull(event.getDocument()); - return new TweetDocument( - docFactory.getStatusId(event), - timeSliceId, - EarlybirdThriftDocumentUtil.getCreatedAtMs(event.getDocument()), - docFactory.newDocument(event)); - } - }); - } - - @Override - public RecordReader getDocumentReader() { - return documentReader; - } - - @Override - public RecordReader getUpdateEventsReader() { - return updateEventsReader; - } - - @Override - public RecordReader getUpdateEventsReaderForSegment( - SegmentInfo segmentInfo) { - return updateEventReaders.get(segmentInfo.getTimeSliceID()); - } - - @Override - public Optional getUpdateEventsStreamOffsetForSegment(SegmentInfo segmentInfo) { - String topic = - SegmentDLUtil.getDLTopicForUpdateEvents(segmentInfo.getSegmentName(), dlReaderVersion); - return updateEventsMultiReader.getUnderlyingOffsetForSegmentWithTopic(topic); - } - - @Override - public boolean allCaughtUp() { - return ((getDocumentReader() == null) || getDocumentReader().isCaughtUp()) - && ((getUpdateEventsReader() == null) || getUpdateEventsReader().isCaughtUp()); - } -} diff --git a/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.docx b/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.docx new file mode 100644 index 000000000..95d57e076 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.java b/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.java deleted file mode 100644 index 0d6ad55b5..000000000 --- a/src/java/com/twitter/search/earlybird/segment/EmptySegmentDataReaderSet.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.twitter.search.earlybird.segment; - -import java.util.Optional; - -import com.twitter.search.common.indexing.thriftjava.ThriftVersionedEvents; -import com.twitter.search.common.util.io.EmptyRecordReader; -import com.twitter.search.common.util.io.recordreader.RecordReader; -import com.twitter.search.earlybird.document.TweetDocument; -import com.twitter.search.earlybird.partition.SegmentInfo; - -/** - * A SegmentDataReaderSet that returns no data. Uses a DocumentReader that is - * always caught up, but never gets exhausted. - * Can be used for bringing up an earlybird against a static set of segments, - * and will not incorporate any new updates. - */ -public class EmptySegmentDataReaderSet implements SegmentDataReaderSet { - public static final EmptySegmentDataReaderSet INSTANCE = new EmptySegmentDataReaderSet(); - - @Override - public void attachDocumentReaders(SegmentInfo segmentInfo) { - } - - @Override - public void attachUpdateReaders(SegmentInfo segmentInfo) { - } - - @Override - public void completeSegmentDocs(SegmentInfo segmentInfo) { - } - - @Override - public void stopSegmentUpdates(SegmentInfo segmentInfo) { - } - - @Override - public void stopAll() { - } - - @Override - public boolean allCaughtUp() { - // ALWAYS CAUGHT UP - return true; - } - - @Override - public RecordReader newDocumentReader(SegmentInfo segmentInfo) - throws Exception { - return null; - } - - @Override - public RecordReader getDocumentReader() { - return new EmptyRecordReader<>(); - } - - @Override - public RecordReader getUpdateEventsReader() { - return null; - } - - @Override - public RecordReader getUpdateEventsReaderForSegment( - SegmentInfo segmentInfo) { - return null; - } - - @Override - public Optional getUpdateEventsStreamOffsetForSegment(SegmentInfo segmentInfo) { - return Optional.of(0L); - } -} diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.docx b/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.docx new file mode 100644 index 000000000..38b775538 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.java b/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.java deleted file mode 100644 index 502bbe1f5..000000000 --- a/src/java/com/twitter/search/earlybird/segment/SegmentDataProvider.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.twitter.search.earlybird.segment; - -/** - * SegmentDataProvider provides information about available segments for indexing. This interface - * abstracts away the actual source of the segment data. It might be a MySQL database, a mock - * object, or a directory of flat files. It also provides access to the segmentInfoMap itself, which - * contains information about the indexing state of Segments. - */ -public interface SegmentDataProvider extends SegmentProvider { - /** - * Returns the set of segment data record readers. - */ - SegmentDataReaderSet getSegmentDataReaderSet(); -} diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.docx b/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.docx new file mode 100644 index 000000000..3ad1aa985 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.java b/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.java deleted file mode 100644 index 84b18c34e..000000000 --- a/src/java/com/twitter/search/earlybird/segment/SegmentDataReaderSet.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.twitter.search.earlybird.segment; - -import java.io.IOException; -import java.util.Optional; - -import com.twitter.search.common.indexing.thriftjava.ThriftVersionedEvents; -import com.twitter.search.common.util.io.recordreader.RecordReader; -import com.twitter.search.earlybird.document.TweetDocument; -import com.twitter.search.earlybird.partition.SegmentInfo; - -/** - * SegmentDataReaderSet provides an interface to create and manage the various - * RecordReaders used to index Earlybird segments. - */ -public interface SegmentDataReaderSet { - /** - * Instruct the document RecordReaders (i.e. document, geo, ... as appropriate) to read from this - * segment. - */ - void attachDocumentReaders(SegmentInfo segmentInfo) throws IOException; - - /** - * Instruct the reader set to add segment to non-document RecordReaders (deletes, features, etc.) - */ - void attachUpdateReaders(SegmentInfo segmentInfo) throws IOException; - - /** - * Mark a segment as "complete", denoting that we are done reading document records from it. - * - * This instructs the reader set to stop reading documents from the segment (if it hasn't - * already), although for now geo-document records can still be read. Updates RecordReaders - * (deletes, etc.) may continue to read entries for the segment. - */ - void completeSegmentDocs(SegmentInfo segmentInfo); - - /** - * This instructs the reader set to stop reading updates for the Segment. It - * should remove the segment from all non-document RecordReaders (deletes, etc.) - */ - void stopSegmentUpdates(SegmentInfo segmentInfo); - - /** - * Stops all RecordReaders and closes all resources. - */ - void stopAll(); - - /** - * Returns true if all RecordReaders are 'caught up' with the data sources they - * are reading from. This might mean that the end of a file has been reached, - * or that we are waiting/polling for new records from an append-only database. - */ - boolean allCaughtUp(); - - /** - * Create a new DocumentReader for the given segment that is not managed by this set. - */ - RecordReader newDocumentReader(SegmentInfo segmentInfo) throws Exception; - - /** - * Returns the document reader for the current segment. - */ - RecordReader getDocumentReader(); - - /** - * Returns a combined update events reader for all segments. - */ - RecordReader getUpdateEventsReader(); - - /** - * Returns the update events reader for the given segment. - */ - RecordReader getUpdateEventsReaderForSegment(SegmentInfo segmentInfo); - - /** - * Returns the offset in the update events stream for the given segment that this earlybird should - * start indexing from. - */ - Optional getUpdateEventsStreamOffsetForSegment(SegmentInfo segmentInfo); -} diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentProvider.docx b/src/java/com/twitter/search/earlybird/segment/SegmentProvider.docx new file mode 100644 index 000000000..1dc2b3ea1 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/segment/SegmentProvider.docx differ diff --git a/src/java/com/twitter/search/earlybird/segment/SegmentProvider.java b/src/java/com/twitter/search/earlybird/segment/SegmentProvider.java deleted file mode 100644 index 7b8b94554..000000000 --- a/src/java/com/twitter/search/earlybird/segment/SegmentProvider.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.twitter.search.earlybird.segment; - -import java.io.IOException; -import java.util.List; - -import com.twitter.search.common.partitioning.base.Segment; - -public interface SegmentProvider { - /** - * Returns a *new* sorted list of all available segments on disk / db / hdfs / etc. - */ - List newSegmentList() throws IOException; -} diff --git a/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.docx b/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.docx new file mode 100644 index 000000000..a16ef2ae0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.docx differ diff --git a/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.java b/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.java deleted file mode 100644 index b8f6a67ec..000000000 --- a/src/java/com/twitter/search/earlybird/stats/EarlybirdRPCStats.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.search.earlybird.stats; - -import java.util.concurrent.TimeUnit; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchRateCounter; -import com.twitter.search.common.metrics.SearchRequestStats; - -/** - * SearchRequestStats with earlybird-specific additional stats. - */ -public final class EarlybirdRPCStats { - private final SearchRequestStats requestStats; - // Number of queries that were terminated early. - private final SearchCounter earlyTerminatedRequests; - - // We do not count client error in the response error rate, but track it separately. - private final SearchRateCounter responseClientErrors; - - public EarlybirdRPCStats(String name) { - requestStats = SearchRequestStats.export(name, TimeUnit.MICROSECONDS, true, true); - earlyTerminatedRequests = SearchCounter.export(name + "_early_terminated"); - responseClientErrors = SearchRateCounter.export(name + "_client_error"); - } - - public long getRequestRate() { - return (long) (double) requestStats.getRequestRate().read(); - } - - public long getAverageLatency() { - return (long) (double) requestStats.getTimerStats().read(); - } - - /** - * Records a completed earlybird request. - * @param latencyUs how long the request took to complete, in microseconds. - * @param resultsCount how many results were returned. - * @param success whether the request was successful or not. - * @param earlyTerminated whether the request terminated early or not. - * @param clientError whether the request failure is caused by client errors - */ - public void requestComplete(long latencyUs, long resultsCount, boolean success, - boolean earlyTerminated, boolean clientError) { - // We treat client errors as successes for top-line metrics to prevent bad client requests (like - // malformed queries) from dropping our success rate and generating alerts. - requestStats.requestComplete(latencyUs, resultsCount, success || clientError); - - if (earlyTerminated) { - earlyTerminatedRequests.increment(); - } - if (clientError) { - responseClientErrors.increment(); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.docx b/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.docx new file mode 100644 index 000000000..a26252b16 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.docx differ diff --git a/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.java b/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.java deleted file mode 100644 index dcaedafdf..000000000 --- a/src/java/com/twitter/search/earlybird/stats/EarlybirdSearcherStats.java +++ /dev/null @@ -1,213 +0,0 @@ -package com.twitter.search.earlybird.stats; - -import java.util.EnumMap; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; - -import com.google.common.base.Preconditions; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchMetricTimerOptions; -import com.twitter.search.common.metrics.SearchStatsReceiver; -import com.twitter.search.common.metrics.SearchTimer; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.ranking.thriftjava.ThriftScoringFunctionType; -import com.twitter.search.earlybird.EarlybirdSearcher; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.ThriftSearchRelevanceOptions; - -/** - * Manages counter and timer stats for EarlybirdSearcher. - */ -public class EarlybirdSearcherStats { - private static final TimeUnit TIME_UNIT = TimeUnit.MICROSECONDS; - - private final SearchStatsReceiver earlybirdServerStatsReceiver; - - public final SearchCounter thriftQueryWithSerializedQuery; - public final SearchCounter thriftQueryWithLuceneQuery; - public final SearchCounter thriftQueryWithoutTextQuery; - public final SearchCounter addedFilterBadUserRep; - public final SearchCounter addedFilterFromUserIds; - public final SearchCounter addedFilterTweetIds; - public final SearchCounter unsetFiltersForSocialFilterTypeQuery; - public final SearchCounter querySpecificSignalMapTotalSize; - public final SearchCounter querySpecificSignalQueriesUsed; - public final SearchCounter querySpecificSignalQueriesErased; - public final SearchCounter authorSpecificSignalMapTotalSize; - public final SearchCounter authorSpecificSignalQueriesUsed; - public final SearchCounter authorSpecificSignalQueriesErased; - public final SearchCounter nullcastTweetsForceExcluded; - public final SearchCounter nullcastUnexpectedResults; - public final SearchCounter nullcastUnexpectedQueries; - public final SearchCounter relevanceAntiGamingFilterUsed; - public final SearchCounter relevanceAntiGamingFilterNotRequested; - public final SearchCounter relevanceAntiGamingFilterSpecifiedTweetsAndFromUserIds; - public final SearchCounter relevanceAntiGamingFilterSpecifiedTweets; - public final SearchCounter relevanceAntiGamingFilterSpecifiedFromUserIds; - public final SearchCounter numCollectorAdjustedMinSearchedStatusID; - - public final Map numRequestsWithBlankQuery; - private final Map latencyByScoringFunctionType; - private final Map> latencyByScoringFunctionTypeAndClient; - private final Map latencyByTensorflowModel; - - public EarlybirdSearcherStats(SearchStatsReceiver earlybirdServerStatsReceiver) { - this.earlybirdServerStatsReceiver = earlybirdServerStatsReceiver; - - this.thriftQueryWithLuceneQuery = - earlybirdServerStatsReceiver.getCounter("thrift_query_with_lucene_query"); - this.thriftQueryWithSerializedQuery = - earlybirdServerStatsReceiver.getCounter("thrift_query_with_serialized_query"); - this.thriftQueryWithoutTextQuery = - earlybirdServerStatsReceiver.getCounter("thrift_query_without_text_query"); - - this.addedFilterBadUserRep = - earlybirdServerStatsReceiver.getCounter("added_filter_bad_user_rep"); - this.addedFilterFromUserIds = - earlybirdServerStatsReceiver.getCounter("added_filter_from_user_ids"); - this.addedFilterTweetIds = - earlybirdServerStatsReceiver.getCounter("added_filter_tweet_ids"); - - this.unsetFiltersForSocialFilterTypeQuery = - earlybirdServerStatsReceiver.getCounter("unset_filters_for_social_filter_type_query"); - this.querySpecificSignalMapTotalSize = - earlybirdServerStatsReceiver.getCounter("query_specific_signal_map_total_size"); - this.querySpecificSignalQueriesUsed = - earlybirdServerStatsReceiver.getCounter("query_specific_signal_queries_used"); - this.querySpecificSignalQueriesErased = - earlybirdServerStatsReceiver.getCounter("query_specific_signal_queries_erased"); - this.authorSpecificSignalMapTotalSize = - earlybirdServerStatsReceiver.getCounter("author_specific_signal_map_total_size"); - this.authorSpecificSignalQueriesUsed = - earlybirdServerStatsReceiver.getCounter("author_specific_signal_queries_used"); - this.authorSpecificSignalQueriesErased = - earlybirdServerStatsReceiver.getCounter("author_specific_signal_queries_erased"); - this.nullcastTweetsForceExcluded = - earlybirdServerStatsReceiver.getCounter("force_excluded_nullcast_result_count"); - this.nullcastUnexpectedResults = - earlybirdServerStatsReceiver.getCounter("unexpected_nullcast_result_count"); - this.nullcastUnexpectedQueries = - earlybirdServerStatsReceiver.getCounter("queries_with_unexpected_nullcast_results"); - this.numCollectorAdjustedMinSearchedStatusID = - earlybirdServerStatsReceiver.getCounter("collector_adjusted_min_searched_status_id"); - - this.relevanceAntiGamingFilterUsed = earlybirdServerStatsReceiver - .getCounter("relevance_anti_gaming_filter_used"); - this.relevanceAntiGamingFilterNotRequested = earlybirdServerStatsReceiver - .getCounter("relevance_anti_gaming_filter_not_requested"); - this.relevanceAntiGamingFilterSpecifiedTweetsAndFromUserIds = earlybirdServerStatsReceiver - .getCounter("relevance_anti_gaming_filter_specified_tweets_and_from_user_ids"); - this.relevanceAntiGamingFilterSpecifiedTweets = earlybirdServerStatsReceiver - .getCounter("relevance_anti_gaming_filter_specified_tweets"); - this.relevanceAntiGamingFilterSpecifiedFromUserIds = earlybirdServerStatsReceiver - .getCounter("relevance_anti_gaming_filter_specified_from_user_ids"); - - this.latencyByScoringFunctionType = new EnumMap<>(ThriftScoringFunctionType.class); - this.latencyByScoringFunctionTypeAndClient = new EnumMap<>(ThriftScoringFunctionType.class); - this.latencyByTensorflowModel = new ConcurrentHashMap<>(); - - for (ThriftScoringFunctionType type : ThriftScoringFunctionType.values()) { - this.latencyByScoringFunctionType.put(type, getTimerStatsByName(getStatsNameByType(type))); - this.latencyByScoringFunctionTypeAndClient.put(type, new ConcurrentHashMap<>()); - } - - this.numRequestsWithBlankQuery = new EnumMap<>(EarlybirdSearcher.QueryMode.class); - - for (EarlybirdSearcher.QueryMode queryMode : EarlybirdSearcher.QueryMode.values()) { - String counterName = - String.format("num_requests_with_blank_query_%s", queryMode.name().toLowerCase()); - - this.numRequestsWithBlankQuery.put( - queryMode, earlybirdServerStatsReceiver.getCounter(counterName)); - } - } - - /** - * Records the latency for a request for the applicable stats. - * @param timer A stopped timer that timed the request. - * @param request The request that was timed. - */ - public void recordRelevanceStats(SearchTimer timer, EarlybirdRequest request) { - Preconditions.checkNotNull(timer); - Preconditions.checkNotNull(request); - Preconditions.checkArgument(!timer.isRunning()); - - ThriftSearchRelevanceOptions relevanceOptions = request.getSearchQuery().getRelevanceOptions(); - - // Only record ranking searches with a set type. - if (!relevanceOptions.isSetRankingParams() - || !relevanceOptions.getRankingParams().isSetType()) { - return; - } - - ThriftRankingParams rankingParams = relevanceOptions.getRankingParams(); - ThriftScoringFunctionType scoringFunctionType = rankingParams.getType(); - - latencyByScoringFunctionType.get(scoringFunctionType).stoppedTimerIncrement(timer); - - if (request.getClientId() != null) { - getTimerStatsByClient(scoringFunctionType, request.getClientId()) - .stoppedTimerIncrement(timer); - } - - if (scoringFunctionType != ThriftScoringFunctionType.TENSORFLOW_BASED) { - return; - } - - String modelName = rankingParams.getSelectedTensorflowModel(); - - if (modelName != null) { - getTimerStatsByTensorflowModel(modelName).stoppedTimerIncrement(timer); - } - } - - /** - * Creates a search timer with options specified by TweetsEarlybirdSearcherStats. - * @return A new SearchTimer. - */ - public SearchTimer createTimer() { - return new SearchTimer(new SearchMetricTimerOptions.Builder() - .withTimeUnit(TIME_UNIT) - .build()); - } - - private SearchTimerStats getTimerStatsByClient( - ThriftScoringFunctionType type, - String clientId) { - Map latencyByClient = latencyByScoringFunctionTypeAndClient.get(type); - - return latencyByClient.computeIfAbsent(clientId, - cid -> getTimerStatsByName(getStatsNameByClientAndType(type, cid))); - } - - private SearchTimerStats getTimerStatsByTensorflowModel(String modelName) { - return latencyByTensorflowModel.computeIfAbsent(modelName, - mn -> getTimerStatsByName(getStatsNameByTensorflowModel(mn))); - } - - private SearchTimerStats getTimerStatsByName(String name) { - return earlybirdServerStatsReceiver.getTimerStats( - name, TIME_UNIT, false, true, false); - } - - public static String getStatsNameByType(ThriftScoringFunctionType type) { - return String.format( - "search_relevance_scoring_function_%s_requests", type.name().toLowerCase()); - } - - public static String getStatsNameByClientAndType( - ThriftScoringFunctionType type, - String clientId) { - return String.format("%s_%s", ClientIdUtil.formatClientId(clientId), getStatsNameByType(type)); - } - - public static String getStatsNameByTensorflowModel(String modelName) { - return String.format( - "model_%s_%s", modelName, getStatsNameByType(ThriftScoringFunctionType.TENSORFLOW_BASED)); - } -} diff --git a/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.docx b/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.docx new file mode 100644 index 000000000..2e276a5c8 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.docx differ diff --git a/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.java b/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.java deleted file mode 100644 index e16b35f6e..000000000 --- a/src/java/com/twitter/search/earlybird/stats/SegmentSyncStats.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.twitter.search.earlybird.stats; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.Timer; - -public class SegmentSyncStats { - private static final String CPU_TOTAL = "_cpu_total_"; - private static final String CPU_USER = "_cpu_user_mode_"; - private static final String CPU_SYS = "_cpu_system_mode_"; - - private final SearchCounter segmentSyncLatency; - private final SearchCounter segmentSyncLatencyCpuTotal; - private final SearchCounter segmentSyncLatencyCpuUserMode; - private final SearchCounter segmentSyncLatencyCpuSystemMode; - private final SearchCounter segmentSyncCount; - private final SearchCounter segmentErrorCount; - - private SegmentSyncStats(SearchCounter segmentSyncLatency, - SearchCounter segmentSyncLatencyCpuTotal, - SearchCounter segmentSyncLatencyCpuUserMode, - SearchCounter segmentSyncLatencyCpuSystemMode, - SearchCounter segmentSyncCount, - SearchCounter segmentErrorCount) { - this.segmentSyncLatency = segmentSyncLatency; - this.segmentSyncLatencyCpuTotal = segmentSyncLatencyCpuTotal; - this.segmentSyncLatencyCpuUserMode = segmentSyncLatencyCpuUserMode; - this.segmentSyncLatencyCpuSystemMode = segmentSyncLatencyCpuSystemMode; - this.segmentSyncCount = segmentSyncCount; - this.segmentErrorCount = segmentErrorCount; - } - - /** - * Creates a new set of stats for the given segment sync action. - * @param action the name to be used for the sync stats. - */ - public SegmentSyncStats(String action) { - this(SearchCounter.export("segment_" + action + "_latency_ms"), - SearchCounter.export("segment_" + action + "_latency" + CPU_TOTAL + "ms"), - SearchCounter.export("segment_" + action + "_latency" + CPU_USER + "ms"), - SearchCounter.export("segment_" + action + "_latency" + CPU_SYS + "ms"), - SearchCounter.export("segment_" + action + "_count"), - SearchCounter.export("segment_" + action + "_error_count")); - } - - /** - * Records a completed action using the specified timer. - */ - public void actionComplete(Timer timer) { - segmentSyncCount.increment(); - segmentSyncLatency.add(timer.getElapsed()); - segmentSyncLatencyCpuTotal.add(timer.getElapsedCpuTotal()); - segmentSyncLatencyCpuUserMode.add(timer.getElapsedCpuUserMode()); - segmentSyncLatencyCpuSystemMode.add(timer.getElapsedCpuSystemMode()); - } - - public void recordError() { - segmentErrorCount.increment(); - } -} diff --git a/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.docx b/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.docx new file mode 100644 index 000000000..a97d84987 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.java b/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.java deleted file mode 100644 index c6dd20c9d..000000000 --- a/src/java/com/twitter/search/earlybird/tools/EarlybirdThriftRequestDeserializerUtil.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.twitter.search.earlybird.tools; - -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.Path; - -import com.google.common.base.Preconditions; - -import org.apache.commons.codec.binary.Base64; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TException; - -import com.twitter.search.earlybird.thrift.EarlybirdRequest; - -/** - * - * This tool deserializes the collected thrift requests into human readable format. - * - * Takes zero or one parameter: path to the thrift request log file. - * - * To run: Launch main from IntelliJ / Eclipse. - */ -public final class EarlybirdThriftRequestDeserializerUtil { - private static final String DEFAULT_LOG_FILE_LOCATION = "/tmp/eb_req.B64"; - // Not threadsafe. Single thread main(). - private static final Base64 B64 = new Base64(0); - private static final TDeserializer DESERIALIZER = new TDeserializer(); - - private EarlybirdThriftRequestDeserializerUtil() { - } - - /** - * Runs the EarlybirdThriftRequestDeserializerUtil tool with the given command-line arguments. - */ - public static void main(String[] args) throws IOException { - Path logFile = null; - if (args.length == 1) { - logFile = FileSystems.getDefault().getPath(args[0]); - } else if (args.length == 0) { - logFile = FileSystems.getDefault().getPath(DEFAULT_LOG_FILE_LOCATION); - } else { - System.err.println("Usage: takes zero or one parameter (log file path). " - + "If no log file is specified, " + DEFAULT_LOG_FILE_LOCATION + " is used."); - //CHECKSTYLE:OFF RegexpSinglelineJava - System.exit(-1); - //CHECKSTYLE:ON RegexpSinglelineJava - } - Preconditions.checkState(logFile.toFile().exists()); - - BufferedReader reader = Files.newBufferedReader(logFile, Charset.defaultCharset()); - try { - String line; - while ((line = reader.readLine()) != null) { - EarlybirdRequest ebRequest = deserializeEBRequest(line); - if (ebRequest != null) { - System.out.println(ebRequest); - } - } - } finally { - reader.close(); - } - } - - private static EarlybirdRequest deserializeEBRequest(String line) { - EarlybirdRequest ebRequest = new EarlybirdRequest(); - byte[] bytes = B64.decode(line); - try { - DESERIALIZER.deserialize(ebRequest, bytes); - } catch (TException e) { - System.err.println("Error deserializing thrift."); - } - return ebRequest; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ActionLogger.docx b/src/java/com/twitter/search/earlybird/util/ActionLogger.docx new file mode 100644 index 000000000..b6e6ee8af Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ActionLogger.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ActionLogger.java b/src/java/com/twitter/search/earlybird/util/ActionLogger.java deleted file mode 100644 index cc21c7956..000000000 --- a/src/java/com/twitter/search/earlybird/util/ActionLogger.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.concurrent.Callable; - -import com.google.common.base.Stopwatch; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public final class ActionLogger { - private static final Logger LOG = LoggerFactory.getLogger(ActionLogger.class); - - private ActionLogger() { - } - - /** - * Run a function, logging a message at the start and end, and the time it took. - */ - public static T call(String message, Callable fn) throws Exception { - LOG.info("Action starting: '{}'.", message); - Stopwatch stopwatch = Stopwatch.createStarted(); - try { - return fn.call(); - } catch (Throwable e) { - LOG.error("Action failed: '{}'.", message, e); - throw e; - } finally { - LOG.info("Action finished in {} '{}'.", stopwatch, message); - } - } - - /** - * Run a function, logging a message at the start and end, and the time it took. - */ - public static void run(String message, CheckedRunnable fn) throws Exception { - call(message, () -> { - fn.run(); - return null; - }); - } - - @FunctionalInterface - public interface CheckedRunnable { - /** - * A nullary function that throws checked exceptions. - */ - void run() throws Exception; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.docx b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.docx new file mode 100644 index 000000000..d877b2d1c Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.java b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.java deleted file mode 100644 index ede199588..000000000 --- a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdAction.java +++ /dev/null @@ -1,409 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.Optional; -import java.util.Random; -import java.util.concurrent.atomic.AtomicBoolean; -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Stopwatch; - -import org.apache.zookeeper.KeeperException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common.base.ExceptionalFunction; -import com.twitter.common.quantity.Amount; -import com.twitter.common.quantity.Time; -import com.twitter.common.zookeeper.ServerSet; -import com.twitter.common.zookeeper.ZooKeeperClient; -import com.twitter.search.common.config.Config; -import com.twitter.search.common.database.DatabaseConfig; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchCustomGauge; -import com.twitter.search.common.util.zktrylock.TryLock; -import com.twitter.search.common.util.zktrylock.ZooKeeperTryLockFactory; -import com.twitter.search.earlybird.ServerSetMember; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.common.config.EarlybirdProperty; -import com.twitter.search.earlybird.exception.AlreadyInServerSetUpdateException; -import com.twitter.search.earlybird.exception.EarlybirdException; -import com.twitter.search.earlybird.exception.CriticalExceptionHandler; -import com.twitter.search.earlybird.exception.NotInServerSetUpdateException; -import com.twitter.search.earlybird.partition.DynamicPartitionConfig; -import com.twitter.search.earlybird.partition.PartitionConfig; -import com.twitter.search.earlybird.partition.SegmentSyncConfig; - -/** - * Utility class for executing tasks on Earlybirds that need to be coordinated across replicas - * on the same hash partition. - * Can be used for things like coordinating optimization on the same timeslice. - * When enabled, a try-lock will be taken out in zookeeper while the task is performed. - * The action will attempt to leave the partition's server set. If the attempt fails, the action - * is aborted. - */ -public class CoordinatedEarlybirdAction implements CoordinatedEarlybirdActionInterface { - private static final Logger LOG = LoggerFactory.getLogger(CoordinatedEarlybirdAction.class); - - private static final Boolean COORDINATED_ACTION_FLAG = Boolean.TRUE; - private static final Boolean NOT_COORDINATED_ACTION_FLAG = Boolean.FALSE; - - private final String actionName; - private final DynamicPartitionConfig dynamicPartitionConfig; - @Nullable private final ServerSetMember serverSetMember; - private final ZooKeeperTryLockFactory zooKeeperTryLockFactory; - - // Whether this action should be coordinated through zookeeper in the first place (could be - // config'ed off). - // If the action is coordinated, this earlybird will leave its server set when performing the - // coordinated action. - private final AtomicBoolean shouldSynchronize; - // Whether this action should ensure that there are enough replicas in the serverset (defined by - // maxAllowedReplicasNotInServerSet) before leaving the serverset. - private final boolean checkNumReplicasInServerSet; - // If this many (or more) servers have left the partition, we cannot perform a coordinated action - private final int maxAllowedReplicasNotInServerSet; - // How long to lock out all other replicas in this hash partition for. - // Should be some small multiple of how long the action is expected to take, to allow for longer - // running cases. - private final long zkLockExpirationTimeMinutes; - // Prefix for the zookeeper lock used when coordinating daily updates. - // Full name should include the hash partition number. - private final String zkLockNamePrefix; - // If we're unable to re-join this earlybird's server set during coordinated updates, - // how many times to retry. - private final int joinServerSetRetries; - // How long to sleep between retries if unable to job back into server set. - private final int joinServerSetRetrySleepMillis; - // How long to sleep between leaving the serverset and executing the action - private final int sleepAfterLeaveServerSetMillis; - - // How many times a this action was called within a lock block. - private final SearchCounter numCoordinatedFunctionCalls; - private final SearchCounter numCoordinatedLeaveServersetCalls; - - private final CriticalExceptionHandler criticalExceptionHandler; - private final SegmentSyncConfig segmentSyncConfig; - - /** - * Create a CoordinatedEarlybirdAction. - * - * @param actionName the name to be used for logging and the prefix for config options. - * @param dynamicPartitionConfig maintains the current partitioning configuration for this - * earlybird. Used mainly to determine the hash partition of this earlybird. - * @param serverSetMember the server that this action is running on. To be used to leaving and - * rejoining the server's server set. - */ - public CoordinatedEarlybirdAction( - ZooKeeperTryLockFactory zooKeeperTryLockFactory, - String actionName, - DynamicPartitionConfig dynamicPartitionConfig, - @Nullable ServerSetMember serverSetMember, - CriticalExceptionHandler criticalExceptionHandler, - SegmentSyncConfig segmentSyncConfig) { - this.actionName = actionName; - this.dynamicPartitionConfig = dynamicPartitionConfig; - this.serverSetMember = serverSetMember; - this.criticalExceptionHandler = criticalExceptionHandler; - this.segmentSyncConfig = segmentSyncConfig; - this.zooKeeperTryLockFactory = zooKeeperTryLockFactory; - if (serverSetMember == null) { - Preconditions.checkState(Config.environmentIsTest(), - "Should only have a null server in tests"); - } - - this.shouldSynchronize = new AtomicBoolean( - EarlybirdConfig.getBool(actionName + "_should_synchronize", false)); - - // Export whether or not synchronization is enabled as a stat - SearchCustomGauge.export( - actionName + "_should_synchronize", () -> shouldSynchronize.get() ? 1 : 0); - - this.checkNumReplicasInServerSet = EarlybirdProperty.CHECK_NUM_REPLICAS_IN_SERVER_SET.get(); - - int numReplicas = - dynamicPartitionConfig.getCurrentPartitionConfig().getNumReplicasInHashPartition(); - this.maxAllowedReplicasNotInServerSet = - EarlybirdProperty.MAX_ALLOWED_REPLICAS_NOT_IN_SERVER_SET.get(numReplicas); - - this.zkLockExpirationTimeMinutes = - EarlybirdConfig.getLong(actionName + "_lock_expiration_time_minutes", 60L); - this.zkLockNamePrefix = actionName + "_for_hash_partition_"; - this.joinServerSetRetries = - EarlybirdConfig.getInt(actionName + "_join_server_set_retries", 20); - this.joinServerSetRetrySleepMillis = - EarlybirdConfig.getInt(actionName + "_join_server_retry_sleep_millis", 2000); - this.sleepAfterLeaveServerSetMillis = - EarlybirdConfig.getInt("coordinated_action_sleep_after_leave_server_set_millis", 30000); - - this.numCoordinatedFunctionCalls = SearchCounter.export(actionName + "_num_coordinated_calls"); - this.numCoordinatedLeaveServersetCalls = - SearchCounter.export(actionName + "_num_coordinated_leave_serverset_calls"); - - if (this.checkNumReplicasInServerSet) { - LOG.info( - "Coordinate action config ({}): allowedNotIn: {}, current number of replicas: {}, " - + "synchronization enabled: {}, checkNumReplicasInServerSet enabled: {}", - actionName, - maxAllowedReplicasNotInServerSet, - dynamicPartitionConfig.getCurrentPartitionConfig().getNumReplicasInHashPartition(), - shouldSynchronize, - this.checkNumReplicasInServerSet); - } else { - LOG.info( - "Coordinate action config ({}): synchronization enabled: {}, " - + "checkNumReplicasInServerSet enabled: {}", - actionName, - shouldSynchronize, - this.checkNumReplicasInServerSet); - } - } - - - @Override - public boolean execute( - String description, - ExceptionalFunction function) - throws E, CoordinatedEarlybirdActionLockFailed { - if (this.shouldSynchronize.get()) { - return executeWithCoordination(description, function); - } else { - return function.apply(NOT_COORDINATED_ACTION_FLAG); - } - } - - enum LeaveServerSetResult { - SUCCESS, - FAILURE, - NOT_IN_SERVER_SET, - NO_SERVER_SET_MEMBER - } - - private LeaveServerSetResult leaveServerSet() { - LOG.info("Leaving serving server set for " + actionName); - try { - serverSetMember.leaveServerSet("CoordinatedAction: " + actionName); - return LeaveServerSetResult.SUCCESS; - } catch (ServerSet.UpdateException ex) { - if (ex instanceof NotInServerSetUpdateException) { - LOG.info("No need to leave; already out of server set during: " - + actionName, ex); - return LeaveServerSetResult.NOT_IN_SERVER_SET; - } else { - LOG.warn("Unable to leave server set during: " + actionName, ex); - return LeaveServerSetResult.FAILURE; - } - } - } - - private LeaveServerSetResult maybeLeaveServerSet() { - if (serverSetMember != null) { - if (serverSetMember.isInServerSet()) { - - if (!checkNumReplicasInServerSet) { - return leaveServerSet(); - } else { - PartitionConfig curPartitionConfig = dynamicPartitionConfig.getCurrentPartitionConfig(); - final int minNumServers = - curPartitionConfig.getNumReplicasInHashPartition() - maxAllowedReplicasNotInServerSet; - Optional numServerSetMembers = getNumberOfServerSetMembers(); - LOG.info("Checking number of replicas before leaving server set for " + actionName - + ". Number of members is: " + numServerSetMembers + " minMembers: " + minNumServers); - if (numServerSetMembers.isPresent() && numServerSetMembers.get() > minNumServers) { - return leaveServerSet(); - } else { - LOG.warn("Not leaving server set during: " + actionName); - return LeaveServerSetResult.FAILURE; - } - } - } else { - LOG.info("Not in server set, no need to leave it."); - return LeaveServerSetResult.NOT_IN_SERVER_SET; - } - } - - return LeaveServerSetResult.NO_SERVER_SET_MEMBER; - } - - private boolean executeWithCoordination( - final String description, - final ExceptionalFunction function) - throws E, CoordinatedEarlybirdActionLockFailed { - PartitionConfig curPartitionConfig = dynamicPartitionConfig.getCurrentPartitionConfig(); - TryLock lock = zooKeeperTryLockFactory.createTryLock( - DatabaseConfig.getLocalHostname(), - segmentSyncConfig.getZooKeeperSyncFullPath(), - zkLockNamePrefix - + curPartitionConfig.getIndexingHashPartitionID(), - Amount.of(zkLockExpirationTimeMinutes, Time.MINUTES) - ); - - final AtomicBoolean success = new AtomicBoolean(false); - - boolean gotLock = lock.tryWithLock(() -> { - Stopwatch actionTiming = Stopwatch.createStarted(); - - LeaveServerSetResult leftServerSet = maybeLeaveServerSet(); - if (leftServerSet == LeaveServerSetResult.FAILURE) { - LOG.info("Failed to leave the server set, will not execute action."); - return; - } - - LOG.info("maybeLeaveServerSet returned: {}", leftServerSet); - - // Sleep for a short time to give the server some time to finish requests that it is currently - // executing and allow roots some time to register that this host has left the server set. - // If we didn't do this and the coordinated action included a full GC, then latency and error - // rate at the root layer would spike higher at the time of the GC. SEARCH-35456 - try { - Thread.sleep(sleepAfterLeaveServerSetMillis); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - - LOG.info(actionName + " synchronization action for " + description); - - try { - numCoordinatedFunctionCalls.increment(); - numCoordinatedLeaveServersetCalls.increment(); - - Boolean successValue = function.apply(COORDINATED_ACTION_FLAG); - success.set(successValue); - } finally { - if (leftServerSet == LeaveServerSetResult.SUCCESS) { - joinServerSet(); - } - LOG.info("{} synchronization action for {} completed after {}, success: {}", - actionName, - description, - actionTiming, - success.get()); - } - }); - - if (!gotLock) { - String errorMsg = actionName + ": Failed to get zk indexing lock for " + description; - LOG.info(errorMsg); - throw new CoordinatedEarlybirdActionLockFailed(errorMsg); - } - return success.get(); - } - - @Override - public void retryActionUntilRan(String description, Runnable action) { - Random random = new Random(System.currentTimeMillis()); - - boolean actionExecuted = false; - int attempts = 0; - while (!actionExecuted) { - try { - attempts++; - actionExecuted = this.execute(description, isCoordinated -> { - action.run(); - return true; - }); - } catch (CoordinatedEarlybirdActionLockFailed ex) { - } - - if (!actionExecuted) { - // Variable sleep amount. The reason for the random sleeps - // is so that across multiple earlybirds this doesn't get - // executed in some sequence that depends on something else - // like maybe deploy times. It might be easier to catch possible - // problems if implicit orderings like this are not introduced. - long msToSleep = (10 + random.nextInt(5)) * 1000L; - try { - Thread.sleep(msToSleep); - } catch (InterruptedException ex) { - LOG.info("Interrupted while trying to execute"); - Thread.currentThread().interrupt(); - } - } else { - LOG.info("Executed {} after {} attempts", actionName, attempts); - } - } - } - - /** - * Gets the current number of servers in this server's server set. - * @return absent Optional if we encountered an exception getting the number of hosts. - */ - private Optional getNumberOfServerSetMembers() { - try { - return serverSetMember != null ? Optional.of(serverSetMember.getNumberOfServerSetMembers()) - : Optional.empty(); - } catch (InterruptedException ex) { - LOG.warn("Action " + actionName + " was interrupted.", ex); - Thread.currentThread().interrupt(); - return Optional.empty(); - } catch (ZooKeeperClient.ZooKeeperConnectionException | KeeperException ex) { - LOG.warn("Exception during " + actionName, ex); - return Optional.empty(); - } - } - - /** - * After a coordinated action, join back this earlybird's server set with retries - * and sleeps in between. - */ - private void joinServerSet() { - Preconditions.checkNotNull(serverSetMember); - - boolean joined = false; - for (int i = 0; i < joinServerSetRetries; i++) { - try { - serverSetMember.joinServerSet("CoordinatedAction: " + actionName); - joined = true; - break; - } catch (AlreadyInServerSetUpdateException ex) { - // Most likely leaving the server set failed - joined = true; - break; - } catch (ServerSet.UpdateException ex) { - LOG.warn("Unable to join server set after " + actionName + " on attempt " - + i, ex); - if (i < (joinServerSetRetries - 1)) { - try { - Thread.sleep(joinServerSetRetrySleepMillis); - } catch (InterruptedException e) { - LOG.warn("Interrupted while waiting to join back server set for: " + actionName); - // Preserve interrupt status. - Thread.currentThread().interrupt(); - break; - } - } - } - } - if (!joined) { - String message = String.format( - "Unable to join server set after %s, setting fatal flag.", - actionName); - EarlybirdException exception = new EarlybirdException(message); - - LOG.error(message, exception); - criticalExceptionHandler.handle(this, exception); - } - } - - - @Override - public boolean setShouldSynchronize(boolean shouldSynchronizeParam) { - boolean oldValue = this.shouldSynchronize.getAndSet(shouldSynchronizeParam); - LOG.info("Updated shouldSynchronize for: " + actionName + " from " + oldValue - + " to " + shouldSynchronizeParam); - return oldValue; - } - - @Override - @VisibleForTesting - public long getNumCoordinatedFunctionCalls() { - return this.numCoordinatedFunctionCalls.get(); - } - - @Override - @VisibleForTesting - public long getNumCoordinatedLeaveServersetCalls() { - return this.numCoordinatedLeaveServersetCalls.get(); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.docx b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.docx new file mode 100644 index 000000000..a2b6fed6c Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.java b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.java deleted file mode 100644 index 4414a6bc9..000000000 --- a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionInterface.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.twitter.search.earlybird.util; - -import com.google.common.annotations.VisibleForTesting; - -import com.twitter.common.base.ExceptionalFunction; - -public interface CoordinatedEarlybirdActionInterface { - /** - * Executes the provided Function associated with the given segment. - * @param description a name for the action to be exected. - * @param function the function to call in a coordinated manner. - * As input, the function will receive a flag indicating whether or not it is being - * called in a coordinated fashion. true if it is, and false otherwise. - * @return true iff the function was executed, and function.apply() returned true; - * throws CoordinatedEarlybirdActionLockFailed if function is not executed (because lock - * aquisition failed). - */ - boolean execute( - String description, - ExceptionalFunction function) - throws E, CoordinatedEarlybirdActionLockFailed; - - /** - * Set whether this action should be synchronized. - * If not, the action is directly applied. If yes, Earlybirds will coordinate executing the - * action via ZooKeeperTryLocks. - */ - boolean setShouldSynchronize(boolean shouldSynchronizeParam); - - /** - * Number of times this coordinated actions has been executed. - * @return - */ - @VisibleForTesting - long getNumCoordinatedFunctionCalls(); - - /** - * Number of times we have left the serverset. - * @return - */ - @VisibleForTesting - long getNumCoordinatedLeaveServersetCalls(); - - /** - * Retry until we can run an action on a single instance in the serverset. - * @param description Text description of the action. - * @param action A runnable to be ran. - */ - void retryActionUntilRan(String description, Runnable action); -} diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.docx b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.docx new file mode 100644 index 000000000..126e03c95 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.java b/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.java deleted file mode 100644 index 52c00f975..000000000 --- a/src/java/com/twitter/search/earlybird/util/CoordinatedEarlybirdActionLockFailed.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.search.earlybird.util; - -/** - * This class represents that coordindated earlybird action can not acquire the lock so that it - * throws this exception. - */ -public class CoordinatedEarlybirdActionLockFailed extends Exception { - public CoordinatedEarlybirdActionLockFailed(String message) { - super(message); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.docx b/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.docx new file mode 100644 index 000000000..ee1937fb5 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.java b/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.java deleted file mode 100644 index 6e2740a7a..000000000 --- a/src/java/com/twitter/search/earlybird/util/EarlybirdDecider.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.twitter.search.earlybird.util; - -import scala.Some; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import com.twitter.decider.Decider; -import com.twitter.decider.Decider$; -import com.twitter.decider.RandomRecipient$; -import com.twitter.decider.Recipient; -import com.twitter.decider.decisionmaker.MutableDecisionMaker; -import com.twitter.search.common.decider.DeciderUtil; -import com.twitter.search.common.decider.SearchDeciderFactory; -import com.twitter.search.earlybird.common.config.EarlybirdProperty; - -/** - * A Singleton to let any code in Earlybird have the ability to be guarded by a decider key. - * - * EarlybirdDecider is a thin wrapper around the Twitter Decider library to provide global access to a single - * decider configuration. This way any code anywhere can easily be guarded by a Decider key. The initializer requires - * EarlybirdConfig to be initialized already. Defaults to a NullDecider, which causes all requests for keys to return - * false. - */ -public final class EarlybirdDecider { - public static final org.slf4j.Logger LOG = - org.slf4j.LoggerFactory.getLogger(EarlybirdDecider.class); - public static final String DECIDER_CONFIG = "./config/earlybird-decider.yml"; - - private static volatile Decider earlybirdDecider = Decider$.MODULE$.NullDecider(); - private static volatile MutableDecisionMaker mutableDecisionMaker; - - private EarlybirdDecider() { } - - /** - * Initializes the global decider accessor. Requires EarlybirdConfig to be initialized. - * - * @return the new decider interface. - */ - public static Decider initialize() { - return initialize(DECIDER_CONFIG); - } - - /** - * Initializes the global decider accessor. Requires EarlybirdConfig to be initialized. - * - * @param configPath path to the base decider config file. - * @return the new decider interface. - */ - @VisibleForTesting public static Decider initialize(String configPath) { - synchronized (EarlybirdDecider.class) { - Preconditions.checkState(earlybirdDecider == Decider$.MODULE$.NullDecider(), - "EarlybirdDecider can be initialized only once."); - - mutableDecisionMaker = new MutableDecisionMaker(); - - if (EarlybirdProperty.USE_DECIDER_OVERLAY.get(false)) { - String category = EarlybirdProperty.DECIDER_OVERLAY_CONFIG.get(); - earlybirdDecider = - SearchDeciderFactory.createDeciderWithoutRefreshBaseWithOverlay( - configPath, category, mutableDecisionMaker); - LOG.info("EarlybirdDecider set to use the decider overlay " + category); - } else { - earlybirdDecider = - SearchDeciderFactory.createDeciderWithRefreshBaseWithoutOverlay( - configPath, mutableDecisionMaker); - LOG.info("EarlybirdDecider set to only use the base config"); - } - return earlybirdDecider; - } - } - - /** - * Check if feature is available based on randomness - * - * @param feature the feature name to test - * @return true if the feature is available, false otherwise - */ - public static boolean isFeatureAvailable(String feature) { - return isFeatureAvailable(feature, RandomRecipient$.MODULE$); - } - - /** - * Check if the feature is available based on the user - * - * The recipient'd id is hashed and used as the value to compare with the decider percentage. Therefore, the same user - * will always get the same result for a given percentage, and higher percentages should always be a superset of the - * lower percentage users. - * - * RandomRecipient can be used to get a random value for every call. - * - * @param feature the feature name to test - * @param recipient the recipient to base a decision on - * @return true if the feature is available, false otherwise - */ - public static boolean isFeatureAvailable(String feature, Recipient recipient) { - if (earlybirdDecider == Decider$.MODULE$.NullDecider()) { - LOG.warn("EarlybirdDecider is uninitialized but requested feature " + feature); - } - - return earlybirdDecider.isAvailable(feature, Some.apply(recipient)); - } - - /** - * Get the raw decider value for a given feature. - * - * @param feature the feature name - * @return the integer value of the decider - */ - public static int getAvailability(String feature) { - return DeciderUtil.getAvailability(earlybirdDecider, feature); - } - - public static Decider getDecider() { - checkInitialized(); - return earlybirdDecider; - } - - public static MutableDecisionMaker getMutableDecisionMaker() { - checkInitialized(); - return mutableDecisionMaker; - } - - private static void checkInitialized() { - Preconditions.checkState(earlybirdDecider != Decider$.MODULE$.NullDecider(), - "EarlybirdDecider is not initialized."); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.docx b/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.docx new file mode 100644 index 000000000..dd5fad86f Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.java b/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.java deleted file mode 100644 index 22af0d8b3..000000000 --- a/src/java/com/twitter/search/earlybird/util/EarlybirdSearchResultUtil.java +++ /dev/null @@ -1,182 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.annotation.Nullable; - -import com.google.common.collect.ImmutableMap; - -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; -import com.twitter.search.common.database.DatabaseConfig; -import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo; -import com.twitter.search.common.util.earlybird.ResultsUtil; -import com.twitter.search.common.util.earlybird.ThriftSearchResultUtil; -import com.twitter.search.common.util.earlybird.ThriftSearchResultsRelevanceStatsUtil; -import com.twitter.search.core.earlybird.facets.LanguageHistogram; -import com.twitter.search.earlybird.partition.PartitionConfig; -import com.twitter.search.earlybird.search.Hit; -import com.twitter.search.earlybird.search.SearchResultsInfo; -import com.twitter.search.earlybird.search.SimpleSearchResults; -import com.twitter.search.earlybird.search.relevance.RelevanceSearchResults; -import com.twitter.search.earlybird.thrift.ThriftSearchResult; -import com.twitter.search.earlybird.thrift.ThriftSearchResultDebugInfo; -import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -// EarlybirdSearchResultUtil contains some simple static methods for constructing -// ThriftSearchResult objects. -public final class EarlybirdSearchResultUtil { - public static final double MIN_LANGUAGE_RATIO_TO_KEEP = 0.002; - - private EarlybirdSearchResultUtil() { } - - /** - * Update result stats on the ThriftSearchResult. - */ - public static void setResultStatistics(ThriftSearchResults results, SearchResultsInfo info) { - results.setNumHitsProcessed(info.getNumHitsProcessed()); - results.setNumPartitionsEarlyTerminated(info.isEarlyTerminated() ? 1 : 0); - if (info.isSetSearchedStatusIDs()) { - results.setMaxSearchedStatusID(info.getMaxSearchedStatusID()); - results.setMinSearchedStatusID(info.getMinSearchedStatusID()); - } - - if (info.isSetSearchedTimes()) { - results.setMaxSearchedTimeSinceEpoch(info.getMaxSearchedTime()); - results.setMinSearchedTimeSinceEpoch(info.getMinSearchedTime()); - } - } - - /** - * Create an EarlyTerminationInfo based on information inside a SearchResultsInfo. - */ - public static EarlyTerminationInfo prepareEarlyTerminationInfo(SearchResultsInfo info) { - EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(info.isEarlyTerminated()); - if (info.isEarlyTerminated()) { - earlyTerminationInfo.setEarlyTerminationReason(info.getEarlyTerminationReason()); - } - return earlyTerminationInfo; - } - - /** - * Populate language histogram inside ThriftSerachResults. - */ - public static void setLanguageHistogram(ThriftSearchResults results, - LanguageHistogram languageHistogram) { - int sum = 0; - for (int value : languageHistogram.getLanguageHistogram()) { - sum += value; - } - if (sum == 0) { - return; - } - ImmutableMap.Builder builder = ImmutableMap.builder(); - int threshold = (int) (sum * MIN_LANGUAGE_RATIO_TO_KEEP); - for (Map.Entry entry : languageHistogram.getLanguageHistogramAsMap() - .entrySet()) { - if (entry.getValue() > threshold) { - builder.put(entry.getKey(), entry.getValue()); - } - } - Map langCounts = builder.build(); - if (langCounts.size() > 0) { - results.setLanguageHistogram(langCounts); - } - } - - private static void addDebugInfoToResults(List resultArray, - @Nullable PartitionConfig partitionConfig) { - if (partitionConfig == null) { - return; - } - ThriftSearchResultDebugInfo debugInfo = new ThriftSearchResultDebugInfo(); - debugInfo.setHostname(DatabaseConfig.getLocalHostname()); - // These info can also come from EarlybirdServer.get().getPartitionConfig() if we add such a - // getter for partitionConfig(). - debugInfo.setPartitionId(partitionConfig.getIndexingHashPartitionID()); - debugInfo.setTiername(partitionConfig.getTierName()); - debugInfo.setClusterName(partitionConfig.getClusterName()); - - for (ThriftSearchResult result : resultArray) { - result.setDebugInfo(debugInfo); - } - } - - /** - * Write results into the result array. - * @param resultArray the result array to write into. - * @param hits the hits from the search. - * @param partitionConfig partition config used to fill in debug info. Pass in null if no debug - * info should be written into results. - */ - public static void prepareResultsArray(List resultArray, - SimpleSearchResults hits, - @Nullable PartitionConfig partitionConfig) { - for (int i = 0; i < hits.numHits(); i++) { - final Hit hit = hits.getHit(i); - final long id = hit.getStatusID(); - final ThriftSearchResult result = new ThriftSearchResult(id); - final ThriftSearchResultMetadata resultMetadata = hit.getMetadata(); - result.setMetadata(resultMetadata); - resultArray.add(result); - } - addDebugInfoToResults(resultArray, partitionConfig); - } - - /** - * Write results into the result array. - * @param resultArray the result array to write into. - * @param hits the hits from the search. - * @param userIDWhitelist Used to set flag ThriftSearchResultMetadata.dontFilterUser. - * @param partitionConfig partition config used to fill in debug info. Pass in null if no debug - * info should be written into results. - */ - public static void prepareRelevanceResultsArray(List resultArray, - RelevanceSearchResults hits, - Set userIDWhitelist, - @Nullable PartitionConfig partitionConfig) { - for (int i = 0; i < hits.numHits(); i++) { - final long id = hits.getHit(i).getStatusID(); - final ThriftSearchResult result = new ThriftSearchResult(id); - final ThriftSearchResultMetadata resultMetadata = hits.resultMetadata[i]; - result.setMetadata(resultMetadata); - if (userIDWhitelist != null) { - resultMetadata.setDontFilterUser(userIDWhitelist.contains(resultMetadata.getFromUserId())); - } - - resultArray.add(result); - } - addDebugInfoToResults(resultArray, partitionConfig); - } - - /** - * Merge a List of ThriftSearchResults into a single ThriftSearchResults object. - */ - public static ThriftSearchResults mergeSearchResults(List allSearchResults) { - ThriftSearchResults mergedResults = new ThriftSearchResults(); - mergedResults.setRelevanceStats(new ThriftSearchResultsRelevanceStats()); - - mergedResults.setHitCounts(ResultsUtil.aggregateCountMap(allSearchResults, - ThriftSearchResultUtil.HIT_COUNTS_MAP_GETTER)); - - mergedResults.setLanguageHistogram(ResultsUtil.aggregateCountMap(allSearchResults, - ThriftSearchResultUtil.LANG_MAP_GETTER)); - - for (ThriftSearchResults searchResults : allSearchResults) { - // Add results - mergedResults.getResults().addAll(searchResults.getResults()); - // Update counts - ThriftSearchResultUtil.incrementCounts(mergedResults, searchResults); - // Update relevance stats - if (searchResults.getRelevanceStats() != null) { - ThriftSearchResultsRelevanceStatsUtil.addRelevanceStats(mergedResults.getRelevanceStats(), - searchResults.getRelevanceStats()); - } - } - - return mergedResults; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/FieldTermCounter.docx b/src/java/com/twitter/search/earlybird/util/FieldTermCounter.docx new file mode 100644 index 000000000..dfb3cd60a Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/FieldTermCounter.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/FieldTermCounter.java b/src/java/com/twitter/search/earlybird/util/FieldTermCounter.java deleted file mode 100644 index 29cece148..000000000 --- a/src/java/com/twitter/search/earlybird/util/FieldTermCounter.java +++ /dev/null @@ -1,304 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.Calendar; -import java.util.Collections; -import java.util.Map; -import java.util.TimeZone; -import java.util.concurrent.atomic.AtomicInteger; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; - -import org.apache.commons.lang.mutable.MutableInt; -import org.apache.commons.lang.mutable.MutableLong; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.metrics.SearchLongGauge; - -/** - * This class is used to count how many times a field happens in hourly and daily stats. - * It is used by TermCountMonitor for iterating all fields in the index. - * - * There is one exception that this class is also used to count the number of tweets in the index. - * Under the situation, the passed in fieldName would be empty string (as TWEET_COUNT_KEY). - */ -public class FieldTermCounter { - private static final Logger LOG = LoggerFactory.getLogger(FieldTermCounter.class); - - static final TimeZone TIME_ZONE = TimeZone.getTimeZone("GMT"); - static final String TWEET_COUNT_KEY = ""; - - private final String fieldName; - private final int instanceCounter; - - // The first date in format "YYYYMMDDHH" that we want to check counts for. - private final int startCheckHour; - // The last date in format "YYYYMMDDHH" that we want to check counts for. - private final int endCheckHour; - // Smallest number of docs we expect to have for each hour. - private final int hourlyMinCount; - //Smallest number of docs we expect to have for each day. - private final int dailyMinCount; - - // Count of tweets for each day, keyed of by the hour in the format "YYYYMMDD". - private final Map exportedHourlyCounts; - - // Count of tweets for each day, keyed of by the day in the format "YYYYMMDD". - private final Map dailyCounts; - - // Only export hourly stats that are below minimum threshold. - private final Map exportedStats; - - private final SearchLongGauge hoursWithNoTweetsStat; - private final SearchLongGauge daysWithNoTweetsStat; - - public FieldTermCounter( - String fieldName, - int instanceCounter, - int startCheckHour, - int endCheckHour, - int hourlyMinCount, - int dailyMinCount) { - this.fieldName = fieldName; - this.instanceCounter = instanceCounter; - this.startCheckHour = startCheckHour; - this.endCheckHour = endCheckHour; - this.hourlyMinCount = hourlyMinCount; - this.dailyMinCount = dailyMinCount; - this.exportedHourlyCounts = Maps.newHashMap(); - this.dailyCounts = Maps.newHashMap(); - this.exportedStats = Maps.newHashMap(); - - this.hoursWithNoTweetsStat = SearchLongGauge.export(getAggregatedNoTweetStatName(true)); - this.daysWithNoTweetsStat = SearchLongGauge.export(getAggregatedNoTweetStatName(false)); - } - - /** - * Updates the stats exported by this class based on the new counts provided in the given map. - */ - public void runWithNewCounts(Map newCounts) { - dailyCounts.clear(); - - // See go/rb/813442/#comment2566569 - // 1. Update all existing hours - updateExistingHourlyCounts(newCounts); - - // 2. Add and export all new hours - addAndExportNewHourlyCounts(newCounts); - - // 3. fill in all the missing hours between know min and max days. - fillMissingHourlyCounts(); - - // 4. Export as a stat, how many hours don't have any tweets (i.e. <= 0) - exportMissingTweetStats(); - } - - // Input: - // . the new hourly count map in the current iteration - // . the existing hourly count map before the current iteration - // If the hourly key matches from the new hourly map to the existing hourly count map, update - // the value of the existing hourly count map to the value from the new hourly count map. - private void updateExistingHourlyCounts(Map newCounts) { - for (Map.Entry exportedCount : exportedHourlyCounts.entrySet()) { - Integer date = exportedCount.getKey(); - AtomicInteger exportedCountValue = exportedCount.getValue(); - - MutableInt newCount = newCounts.get(date); - if (newCount == null) { - exportedCountValue.set(0); - } else { - exportedCountValue.set(newCount.intValue()); - // clean up so that we don't check this date again when we look for new hours - newCounts.remove(date); - } - } - } - - // Input: - // . the new hourly count map in the current iteration - // . the existing hourly count map before the current iteration - // This function is called after the above function of updateExistingHourlyCounts() so that all - // matching key value pairs have been removed from the new hourly count map. - // Move all remaining valid values from the new hourly count map to the existing hourly count - // map. - private void addAndExportNewHourlyCounts(Map newCounts) { - for (Map.Entry newCount : newCounts.entrySet()) { - Integer hour = newCount.getKey(); - MutableInt newCountValue = newCount.getValue(); - Preconditions.checkState(!exportedHourlyCounts.containsKey(hour), - "Should have already processed and removed existing hours: " + hour); - - AtomicInteger newStat = new AtomicInteger(newCountValue.intValue()); - exportedHourlyCounts.put(hour, newStat); - } - } - - // Find whether the existing hourly count map has hourly holes. If such holes exist, fill 0 - // values so that they can be exported. - private void fillMissingHourlyCounts() { - // Figure out the time range for which we should have tweets in the index. At the very least, - // this range should cover [startCheckHour, endCheckHour) if endCheckHour is set, or - // [startCheckHour, latestHourInTheIndexWithTweets] if endCheckHour is not set (latest tier or - // realtime cluster). - int startHour = startCheckHour; - int endHour = endCheckHour < getHourValue(Calendar.getInstance(TIME_ZONE)) ? endCheckHour : -1; - for (int next : exportedHourlyCounts.keySet()) { - if (next < startHour) { - startHour = next; - } - if (next > endHour) { - endHour = next; - } - } - - Calendar endHourCal = getCalendarValue(endHour); - Calendar hour = getCalendarValue(startHour); - for (; hour.before(endHourCal); hour.add(Calendar.HOUR_OF_DAY, 1)) { - int hourValue = getHourValue(hour); - if (!exportedHourlyCounts.containsKey(hourValue)) { - exportedHourlyCounts.put(hourValue, new AtomicInteger(0)); - } - } - } - - private void exportMissingTweetStats() { - int hoursWithNoTweets = 0; - int daysWithNoTweets = 0; - - for (Map.Entry hourlyCount : exportedHourlyCounts.entrySet()) { - int hour = hourlyCount.getKey(); - if ((hour < startCheckHour) || (hour >= endCheckHour)) { - continue; - } - - // roll up the days - int day = hour / 100; - MutableLong dayCount = dailyCounts.get(day); - if (dayCount == null) { - dailyCounts.put(day, new MutableLong(hourlyCount.getValue().get())); - } else { - dayCount.setValue(dayCount.longValue() + hourlyCount.getValue().get()); - } - AtomicInteger exportedCountValue = hourlyCount.getValue(); - if (exportedCountValue.get() <= hourlyMinCount) { - // We do not export hourly too few tweets for index fields as it can 10x the existing - // exported stats. - // We might consider whitelisting some high frequency fields later. - if (isFieldForTweet()) { - String statsName = getStatName(hourlyCount.getKey()); - SearchLongGauge stat = SearchLongGauge.export(statsName); - stat.set(exportedCountValue.longValue()); - exportedStats.put(statsName, stat); - } - LOG.warn("Found an hour with too few tweets. Field: <{}> Hour: {} count: {}", - fieldName, hour, exportedCountValue); - hoursWithNoTweets++; - } - } - - for (Map.Entry dailyCount : dailyCounts.entrySet()) { - if (dailyCount.getValue().longValue() <= dailyMinCount) { - LOG.warn("Found a day with too few tweets. Field: <{}> Day: {} count: {}", - fieldName, dailyCount.getKey(), dailyCount.getValue()); - daysWithNoTweets++; - } - } - - hoursWithNoTweetsStat.set(hoursWithNoTweets); - daysWithNoTweetsStat.set(daysWithNoTweets); - } - - // When the fieldName is empty string (as TWEET_COUNT_KEY), it means that we are counting the - // number of tweets for the index, not for some specific fields. - private boolean isFieldForTweet() { - return TWEET_COUNT_KEY.equals(fieldName); - } - - private String getAggregatedNoTweetStatName(boolean hourly) { - if (isFieldForTweet()) { - if (hourly) { - return "hours_with_no_indexed_tweets_v_" + instanceCounter; - } else { - return "days_with_no_indexed_tweets_v_" + instanceCounter; - } - } else { - if (hourly) { - return "hours_with_no_indexed_fields_v_" + fieldName + "_" + instanceCounter; - } else { - return "days_with_no_indexed_fields_v_" + fieldName + "_" + instanceCounter; - } - } - } - - @VisibleForTesting - String getStatName(Integer date) { - return getStatName(fieldName, instanceCounter, date); - } - - @VisibleForTesting - static String getStatName(String field, int instance, Integer date) { - if (TWEET_COUNT_KEY.equals(field)) { - return "tweets_indexed_on_hour_v_" + instance + "_" + date; - } else { - return "tweets_indexed_on_hour_v_" + instance + "_" + field + "_" + date; - } - } - - @VisibleForTesting - Map getExportedCounts() { - return Collections.unmodifiableMap(exportedHourlyCounts); - } - - @VisibleForTesting - Map getDailyCounts() { - return Collections.unmodifiableMap(dailyCounts); - } - - @VisibleForTesting - long getHoursWithNoTweets() { - return hoursWithNoTweetsStat.get(); - } - - @VisibleForTesting - long getDaysWithNoTweets() { - return daysWithNoTweetsStat.get(); - } - - @VisibleForTesting - Map getExportedHourlyCountStats() { - return exportedStats; - } - - /** - * Given a unit time in seconds since epoch UTC, will return the day in format "YYYYMMDDHH" - * as an int. - */ - @VisibleForTesting - static int getHourValue(Calendar cal, int timeSecs) { - cal.setTimeInMillis(timeSecs * 1000L); - return getHourValue(cal); - } - - static int getHourValue(Calendar cal) { - int year = cal.get(Calendar.YEAR) * 1000000; - int month = (cal.get(Calendar.MONTH) + 1) * 10000; // month is 0-based - int day = cal.get(Calendar.DAY_OF_MONTH) * 100; - int hour = cal.get(Calendar.HOUR_OF_DAY); - return year + month + day + hour; - } - - @VisibleForTesting - static Calendar getCalendarValue(int hour) { - Calendar cal = Calendar.getInstance(TIME_ZONE); - - int year = hour / 1000000; - int month = ((hour / 10000) % 100) - 1; // 0-based - int day = (hour / 100) % 100; - int hr = hour % 100; - cal.setTimeInMillis(0); // reset all time fields - cal.set(year, month, day, hr, 0); - return cal; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/Histogram.docx b/src/java/com/twitter/search/earlybird/util/Histogram.docx new file mode 100644 index 000000000..8d65640ca Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/Histogram.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/Histogram.java b/src/java/com/twitter/search/earlybird/util/Histogram.java deleted file mode 100644 index ccf40a64e..000000000 --- a/src/java/com/twitter/search/earlybird/util/Histogram.java +++ /dev/null @@ -1,160 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.google.common.base.Preconditions; - -/** - * A histogram of int values with arbitrary buckets. - * Keeps a count for each bucket, and a sum of values for each bucket. - * The histogram view is returned as a list of {@link Histogram.Entry}s. - *

- * Bucket boundaries are inclusive on the upper boundaries. Given buckets of [0, 10, 100], - * items will be places in 4 bins, { X <= 0, 0 < X <= 10, 10 < X <= 100, X > 100 }. - *

- * This class is not thread safe. - * - */ -public class Histogram { - private final double[] buckets; - private final int[] itemsCount; - private final long[] itemsSum; - private int totalCount; - private long totalSum; - - public static class Entry { - private final String bucketName; - private final int count; - private final double countPercent; - private final double countCumulative; - private final long sum; - private final double sumPercent; - private final double sumCumulative; - - Entry(String bucketName, - int count, double countPercent, double countCumulative, - long sum, double sumPercent, double sumCumulative) { - this.bucketName = bucketName; - this.count = count; - this.countPercent = countPercent; - this.countCumulative = countCumulative; - this.sum = sum; - this.sumPercent = sumPercent; - this.sumCumulative = sumCumulative; - } - - public String getBucketName() { - return bucketName; - } - - public int getCount() { - return count; - } - - public double getCountPercent() { - return countPercent; - } - - public double getCountCumulative() { - return countCumulative; - } - - public long getSum() { - return sum; - } - - public double getSumPercent() { - return sumPercent; - } - - public double getSumCumulative() { - return sumCumulative; - } - } - - /** - * No buckets will put all items into a single bin. - * @param buckets the buckets to use for binnning data. - * An item will be put in bin i if item <= buckets[i] and > buckets[i-1] - * The bucket values must be strictly increasing. - */ - public Histogram(double... buckets) { - Preconditions.checkNotNull(buckets); - this.buckets = new double[buckets.length]; - for (int i = 0; i < buckets.length; i++) { - this.buckets[i] = buckets[i]; - if (i > 0) { - Preconditions.checkState(this.buckets[i - 1] < this.buckets[i], - "Histogram buckets must me strictly increasing: " + Arrays.toString(buckets)); - } - } - this.itemsCount = new int[buckets.length + 1]; - this.itemsSum = new long[buckets.length + 1]; - this.totalCount = 0; - this.totalSum = 0; - } - - /** - * Add the given item to the appropriate bucket. - */ - public void addItem(double item) { - int i = 0; - for (; i < this.buckets.length; i++) { - if (item <= buckets[i]) { - break; - } - } - this.itemsCount[i]++; - this.totalCount++; - this.itemsSum[i] += item; - this.totalSum += item; - } - - /** - * returns the current view of all the bins. - */ - public List entries() { - List entries = new ArrayList<>(this.itemsCount.length); - double countCumulative = 0; - double sumCumulative = 0; - for (int i = 0; i < this.itemsCount.length; i++) { - String bucketName; - if (i < this.buckets.length) { - bucketName = "<= " + this.buckets[i]; - } else if (this.buckets.length > 0) { - bucketName = " > " + this.buckets[this.buckets.length - 1]; - } else { - bucketName = " * "; - } - - int count = this.itemsCount[i]; - double countPercent = this.totalCount == 0 ? 0 : ((double) this.itemsCount[i]) / totalCount; - countCumulative += countPercent; - - long sum = this.itemsSum[i]; - double sumPercent = this.totalSum == 0 ? 0 : ((double) this.itemsSum[i]) / totalSum; - sumCumulative += sumPercent; - - Entry e = new Entry(bucketName, count, countPercent, countCumulative, - sum, sumPercent, sumCumulative); - entries.add(e); - } - return entries; - } - - /** - * Returns total number of items seen. - */ - public int getTotalCount() { - return totalCount; - } - - /** - * Returns sum of all the items seen. - */ - public long getTotalSum() { - return totalSum; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/IndexViewer.docx b/src/java/com/twitter/search/earlybird/util/IndexViewer.docx new file mode 100644 index 000000000..1ccbc17d3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/IndexViewer.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/IndexViewer.java b/src/java/com/twitter/search/earlybird/util/IndexViewer.java deleted file mode 100644 index d8966a611..000000000 --- a/src/java/com/twitter/search/earlybird/util/IndexViewer.java +++ /dev/null @@ -1,798 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Locale; -import java.util.Set; -import java.util.TreeSet; - -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; - -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; - -import com.twitter.search.common.constants.thriftjava.ThriftLanguage; -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant; -import com.twitter.search.common.schema.thriftjava.ThriftCSFType; -import com.twitter.search.common.util.analysis.IntTermAttributeImpl; -import com.twitter.search.common.util.analysis.LongTermAttributeImpl; -import com.twitter.search.common.util.analysis.SortableLongTermAttributeImpl; -import com.twitter.search.common.util.spatial.GeoUtil; -import com.twitter.search.core.earlybird.index.DocIDToTweetIDMapper; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.inverted.MPHTermDictionary; -import com.twitter.search.core.earlybird.index.inverted.RealtimeIndexTerms; -import com.twitter.search.earlybird.index.EarlybirdSingleSegmentSearcher; - -import geo.google.datamodel.GeoCoordinate; - -public class IndexViewer { - /** - * Fields whose terms are indexed using - * {@link com.twitter.search.common.util.analysis.IntTermAttribute} - */ - private static final Set INT_TERM_ATTRIBUTE_FIELDS = ImmutableSet.of( - EarlybirdFieldConstant.CREATED_AT_FIELD.getFieldName(), - EarlybirdFieldConstant.LINK_CATEGORY_FIELD.getFieldName(), - EarlybirdFieldConstant - .NORMALIZED_FAVORITE_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName(), - EarlybirdFieldConstant - .NORMALIZED_REPLY_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName(), - EarlybirdFieldConstant - .NORMALIZED_RETWEET_COUNT_GREATER_THAN_OR_EQUAL_TO_FIELD.getFieldName(), - EarlybirdFieldConstant.COMPOSER_SOURCE.getFieldName()); - - /** - * Fields whose terms are indexed using - * {@link com.twitter.search.common.util.analysis.LongTermAttribute} - */ - private static final Set LONG_TERM_ATTRIBUTE_FIELDS = ImmutableSet.of( - EarlybirdFieldConstant.CONVERSATION_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.LIKED_BY_USER_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.QUOTED_TWEET_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.QUOTED_USER_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.REPLIED_TO_BY_USER_ID.getFieldName(), - EarlybirdFieldConstant.RETWEETED_BY_USER_ID.getFieldName(), - EarlybirdFieldConstant.DIRECTED_AT_USER_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.FROM_USER_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.IN_REPLY_TO_TWEET_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.IN_REPLY_TO_USER_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.RETWEET_SOURCE_TWEET_ID_FIELD.getFieldName(), - EarlybirdFieldConstant.RETWEET_SOURCE_USER_ID_FIELD.getFieldName()); - - /** - * Fields whose terms index using SORTED - * {@link com.twitter.search.common.util.analysis.LongTermAttribute} - */ - private static final Set SORTED_LONG_TERM_ATTRIBUTE_FIELDS = - ImmutableSet.of(EarlybirdFieldConstant.ID_FIELD.getFieldName()); - - private final EarlybirdSingleSegmentSearcher searcher; - private final EarlybirdIndexSegmentAtomicReader twitterReader; - - public long getTimeSliceId() { - return searcher.getTimeSliceID(); - } - - public static class Options { - private boolean dumpHexTerms = false; - private String charset; - private double[] histogramBuckets; - private boolean termLengthHistogram; - - public Options setDumpHexTerms(boolean dumpHexTermsParam) { - this.dumpHexTerms = dumpHexTermsParam; - return this; - } - - public Options setCharset(String charsetParam) { - this.charset = charsetParam; - return this; - } - - public Options setHistogramBuckets(double[] histogramBucketsParam) { - this.histogramBuckets = histogramBucketsParam; - return this; - } - - public Options setTermLengthHistogram(boolean termLengthHistogramParam) { - this.termLengthHistogram = termLengthHistogramParam; - return this; - } - } - - /** - * Data Transfer Object for Terms, encapsulates the "json" serialization - * while maintaining streaming mode - */ - private static class TermDto { - - private final String field; - private final String term; - private final String docFreq; - private final String percent; - private final PostingsEnum docsEnum; - private final TermsEnum termsEnum; - private final Integer maxDocs; - - public TermDto(String field, String term, String docFreq, String percent, - PostingsEnum docsEnum, TermsEnum termsEnum, Integer maxDocs) { - this.field = field; - this.term = term; - this.docFreq = docFreq; - this.percent = percent; - this.docsEnum = docsEnum; - this.termsEnum = termsEnum; - this.maxDocs = maxDocs; - } - - public void write(ViewerWriter writer, - EarlybirdIndexSegmentAtomicReader twitterReader) throws IOException { - writer.beginObject(); - writer.name("field").value(field); - writer.name("term").value(term); - writer.name("docFreq").value(docFreq); - writer.name("percent").value(percent); - if (docsEnum != null) { - appendFrequencyAndPositions(writer, field, docsEnum, twitterReader); - } - if (maxDocs != null) { - appendDocs(writer, termsEnum, maxDocs, twitterReader); - } - writer.endObject(); - } - } - - /** - * Data Transfer Object for Terms, encapsulates the "json" serialization - * while maintaining streaming mode - */ - private static class StatsDto { - - private final String field; - private final String numTerms; - private final String terms; - - - public StatsDto(String field, String numTerms, String terms) { - this.field = field; - this.numTerms = numTerms; - this.terms = terms; - } - - public void write(ViewerWriter writer) throws IOException { - writer.beginObject(); - - writer.name("field").value(field); - writer.name("numTerms").value(numTerms); - writer.name("terms").value(terms); - - writer.endObject(); - } - } - - public IndexViewer(EarlybirdSingleSegmentSearcher searcher) { - this.searcher = searcher; - this.twitterReader = searcher.getTwitterIndexReader(); - } - - private boolean shouldSeekExact(Terms terms, TermsEnum termsEnum) { - return terms instanceof RealtimeIndexTerms - || termsEnum instanceof MPHTermDictionary.MPHTermsEnum; - } - - /** - * Dumps all terms for a given tweet id. - * @param writer writer being used - * @param tweetId the tweet id to use - */ - public void dumpTweetDataByTweetId(ViewerWriter writer, long tweetId, Options options) - throws IOException { - int docId = twitterReader.getSegmentData().getDocIDToTweetIDMapper().getDocID(tweetId); - dumpTweetDataByDocId(writer, docId, options); - } - - /** - * Dumps all terms for a given doc id. - * @param writer writer being used - * @param docId the document id to use. - */ - public void dumpTweetDataByDocId(ViewerWriter writer, int docId, Options options) - throws IOException { - writer.beginObject(); - - printHeader(writer); - long tweetID = twitterReader.getSegmentData().getDocIDToTweetIDMapper().getTweetID(docId); - if (docId < twitterReader.maxDoc() && tweetID >= 0) { - writer.name("docId").value(Integer.toString(docId)); - writer.name("tweetId").value(Long.toString(tweetID)); - dumpIndexedFields(writer, docId, options); - dumpCsfFields(writer, docId); - } - writer.endObject(); - } - - /** - * Dumps all tweet IDs in the current segment to the given file. - */ - public void dumpTweetIds(ViewerWriter writer, String logFile, PrintWriter logWriter) - throws IOException { - writeTweetIdsToLogFile(logWriter); - - writer.beginObject(); - writer.name(Long.toString(searcher.getTimeSliceID())).value(logFile); - writer.endObject(); - } - - private void writeTweetIdsToLogFile(PrintWriter logWriter) { - DocIDToTweetIDMapper mapper = twitterReader.getSegmentData().getDocIDToTweetIDMapper(); - int docId = Integer.MIN_VALUE; - while ((docId = mapper.getNextDocID(docId)) != DocIDToTweetIDMapper.ID_NOT_FOUND) { - long tweetId = mapper.getTweetID(docId); - - // Ensure tweet ID is valid and non-deleted - if ((tweetId > 0) && !twitterReader.getDeletesView().isDeleted(docId)) { - logWriter.println(tweetId); - } - } - } - - private void dumpIndexedFields(ViewerWriter writer, int docId, - Options options) throws IOException { - writer.name("indexedFields"); - writer.beginArray(); - writer.newline(); - for (String field : sortedFields()) { - dumpTweetData(writer, field, docId, options); - } - writer.endArray(); - writer.newline(); - } - - private void dumpCsfFields(ViewerWriter writer, int docId) throws IOException { - writer.name("csfFields"); - writer.beginArray(); - writer.newline(); - dumpCSFData(writer, docId); - - writer.endArray(); - } - - /** - * Dumps all CSF values for a given doc id. - * @param writer writer being used - * @param docId the document id to use. - */ - private void dumpCSFData(ViewerWriter writer, int docId) throws IOException { - Schema tweetSchema = twitterReader.getSchema(); - - // Sort the FieldInfo objects to generate fixed order to make testing easier - List sortedFieldInfos = new ArrayList<>(tweetSchema.getFieldInfos()); - sortedFieldInfos.sort(Comparator.comparing(Schema.FieldInfo::getFieldId)); - - for (Schema.FieldInfo fieldInfo: sortedFieldInfos) { - String csfFieldInfoName = fieldInfo.getName(); - ThriftCSFType csfType = tweetSchema.getCSFFieldType(csfFieldInfoName); - NumericDocValues csfDocValues = twitterReader.getNumericDocValues(csfFieldInfoName); - // If twitterReader.getNumericDocValues(value.getName()) == null, - // means no NumericDocValue was indexed for the field so ignore - if (csfType != null && csfDocValues != null && csfDocValues.advanceExact(docId)) { - long csfValue = csfDocValues.longValue(); - writer.beginObject(); - writer.name("field").value(formatField(csfFieldInfoName)); - writer.name("value"); - if (csfFieldInfoName.equals(EarlybirdFieldConstant.LAT_LON_CSF_FIELD.getFieldName())) { - writer.value(latlongDecode(csfValue)); - } else if (csfFieldInfoName.equals(EarlybirdFieldConstant.LANGUAGE.getFieldName())) { - writer.value(languageDecode(csfValue)); - } else if (csfFieldInfoName.equals(EarlybirdFieldConstant.CARD_LANG_CSF.getFieldName())) { - writer.value(languageDecode(csfValue)); - } else { - writer.value(Long.toString(csfValue)); - } - writer.endObject(); - writer.newline(); - } - } - } - - /** - * Decipher long value gotten, put into format (lat, lon) - * Decode the stored long value by creating a geocode - */ - private String latlongDecode(long csfValue) { - StringBuilder sb = new StringBuilder(); - GeoCoordinate geoCoordinate = new GeoCoordinate(); - if (GeoUtil.decodeLatLonFromInt64(csfValue, geoCoordinate)) { - sb.append(geoCoordinate.getLatitude()).append(", ").append(geoCoordinate.getLongitude()); - } else { - sb.append(csfValue).append(" (Value Unset or Invalid Coordinate)"); - } - return sb.toString(); - } - - /** - * Decipher long value gotten into string of tweet's language - */ - private String languageDecode(long csfValue) { - StringBuilder sb = new StringBuilder(); - ThriftLanguage languageType = ThriftLanguage.findByValue((int) csfValue); - sb.append(csfValue).append(" (").append(languageType).append(")"); - return sb.toString(); - } - - private void dumpTweetData(ViewerWriter writer, - String field, - int docId, - Options options) throws IOException { - - Terms terms = twitterReader.terms(field); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - if (shouldSeekExact(terms, termsEnum)) { - long numTerms = terms.size(); - for (int i = 0; i < numTerms; i++) { - termsEnum.seekExact(i); - dumpTweetDataTerm(writer, field, termsEnum, docId, options); - } - } else { - while (termsEnum.next() != null) { - dumpTweetDataTerm(writer, field, termsEnum, docId, options); - } - } - } - } - - private void dumpTweetDataTerm(ViewerWriter writer, String field, TermsEnum termsEnum, - int docId, Options options) throws IOException { - PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); - if (docsAndPositionsEnum != null && docsAndPositionsEnum.advance(docId) == docId) { - printTerm(writer, field, termsEnum, docsAndPositionsEnum, null, options); - } - } - - /** - * Prints the histogram for the currently viewed index. - * @param writer current viewerWriter - * @param field if null, will use all fields - * @param options options for dumping out text - */ - public void dumpHistogram(ViewerWriter writer, String field, Options options) throws IOException { - writer.beginObject(); - printHeader(writer); - writer.name("histogram"); - writer.beginArray(); - writer.newline(); - if (field == null) { - for (String field2 : sortedFields()) { - dumpFieldHistogram(writer, field2, options); - } - } else { - dumpFieldHistogram(writer, field, options); - } - writer.endArray(); - writer.endObject(); - } - - private void dumpFieldHistogram(ViewerWriter writer, String field, Options options) - throws IOException { - Histogram histo = new Histogram(options.histogramBuckets); - - Terms terms = twitterReader.terms(field); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - if (shouldSeekExact(terms, termsEnum)) { - long numTerms = terms.size(); - for (int i = 0; i < numTerms; i++) { - termsEnum.seekExact(i); - countHistogram(options, histo, termsEnum); - } - } else { - while (termsEnum.next() != null) { - countHistogram(options, histo, termsEnum); - } - } - printHistogram(writer, field, options, histo); - } - } - - private void printHistogram(ViewerWriter writer, String field, Options options, - Histogram histo) throws IOException { - - String bucket = options.termLengthHistogram ? "termLength" : "df"; - for (Histogram.Entry histEntry : histo.entries()) { - String format = - String.format(Locale.US, - "field: %s %sBucket: %11s count: %10d " - + "percent: %6.2f%% cumulative: %6.2f%% totalCount: %10d" - + " sum: %15d percent: %6.2f%% cumulative: %6.2f%% totalSum: %15d", - formatField(field), - bucket, - histEntry.getBucketName(), - histEntry.getCount(), - histEntry.getCountPercent() * 100.0, - histEntry.getCountCumulative() * 100.0, - histo.getTotalCount(), - histEntry.getSum(), - histEntry.getSumPercent() * 100.0, - histEntry.getSumCumulative() * 100.0, - histo.getTotalSum() - ); - writer.value(format); - writer.newline(); - } - } - - private void countHistogram(Options options, Histogram histo, TermsEnum termsEnum) - throws IOException { - if (options.termLengthHistogram) { - final BytesRef bytesRef = termsEnum.term(); - histo.addItem(bytesRef.length); - } else { - histo.addItem(termsEnum.docFreq()); - } - } - - - /** - * Prints terms and optionally documents for the currently viewed index. - * @param writer writer being used - * @param field if null, will use all fields - * @param term if null will use all terms - * @param maxTerms will print at most this many terms per field. If null will print 0 terms. - * @param maxDocs will print at most this many documents, If null, will not print docs. - * @param options options for dumping out text - */ - public void dumpData(ViewerWriter writer, String field, String term, Integer maxTerms, - Integer maxDocs, Options options, boolean shouldSeekToTerm) throws IOException { - - writer.beginObject(); - printHeader(writer); - - writer.name("terms"); - writer.beginArray(); - writer.newline(); - dumpDataInternal(writer, field, term, maxTerms, maxDocs, options, shouldSeekToTerm); - writer.endArray(); - writer.endObject(); - } - - private void dumpDataInternal(ViewerWriter writer, String field, String term, Integer maxTerms, - Integer maxDocs, Options options, boolean shouldSeekToTerm) throws IOException { - - if (field == null) { - dumpDataForAllFields(writer, term, maxTerms, maxDocs, options); - return; - } - if (term == null) { - dumpDataForAllTerms(writer, field, maxTerms, maxDocs, options); - return; - } - Terms terms = twitterReader.terms(field); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(term)); - if (status == TermsEnum.SeekStatus.FOUND) { - printTerm(writer, field, termsEnum, null, maxDocs, options); - } - if (shouldSeekToTerm) { - dumpTermsAfterSeek(writer, field, terms, maxTerms, maxDocs, options, termsEnum, status); - } - } - } - - /** - * if term (cursor) is found for an indexed segment - dump the next termsLeft words - * starting from the current position in the enum. For an indexed segment, - * seekCeil will place the enum at the word or the next "ceiling" term. For - * a realtime index, if the word is not found we do not paginate anything - * We also only paginate if the TermsEnum is not at the end. - */ - private void dumpTermsAfterSeek(ViewerWriter writer, String field, Terms terms, Integer maxTerms, - Integer maxDocs, Options options, TermsEnum termsEnum, TermsEnum.SeekStatus status) - throws IOException { - if (status != TermsEnum.SeekStatus.END) { - // for realtime, to not repeat the found word - if (shouldSeekExact(terms, termsEnum)) { - termsEnum.next(); - } - if (status != TermsEnum.SeekStatus.FOUND) { - // if not found, print out curr term before calling next() - printTerm(writer, field, termsEnum, null, maxDocs, options); - } - for (int termsLeft = maxTerms - 1; termsLeft > 0 && termsEnum.next() != null; termsLeft--) { - printTerm(writer, field, termsEnum, null, maxDocs, options); - } - } - } - - private void dumpDataForAllFields(ViewerWriter writer, String term, Integer maxTerms, - Integer maxDocs, Options options) throws IOException { - for (String field : sortedFields()) { - dumpDataInternal(writer, field, term, maxTerms, maxDocs, options, false); - } - } - - private List sortedFields() { - // Tweet facets are added to a special $facets field, which is not part of the schema. - // We include it here, because seeing the facets for a tweet is generally useful. - List fields = Lists.newArrayList("$facets"); - for (Schema.FieldInfo fieldInfo : twitterReader.getSchema().getFieldInfos()) { - if (fieldInfo.getFieldType().indexOptions() != IndexOptions.NONE) { - fields.add(fieldInfo.getName()); - } - } - Collections.sort(fields); - return fields; - } - - private void dumpDataForAllTerms(ViewerWriter writer, - String field, - Integer maxTerms, - Integer maxDocs, - Options options) throws IOException { - Terms terms = twitterReader.terms(field); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - if (shouldSeekExact(terms, termsEnum)) { - long numTerms = terms.size(); - long termToDump = maxTerms == null ? 0 : Math.min(numTerms, maxTerms); - for (int i = 0; i < termToDump; i++) { - termsEnum.seekExact(i); - printTerm(writer, field, termsEnum, null, maxDocs, options); - } - } else { - int max = maxTerms == null ? 0 : maxTerms; - while (max > 0 && termsEnum.next() != null) { - printTerm(writer, field, termsEnum, null, maxDocs, options); - max--; - } - } - } - } - - private String termToString(String field, BytesRef bytesTerm, Options options) - throws UnsupportedEncodingException { - if (INT_TERM_ATTRIBUTE_FIELDS.contains(field)) { - return Integer.toString(IntTermAttributeImpl.copyBytesRefToInt(bytesTerm)); - } else if (LONG_TERM_ATTRIBUTE_FIELDS.contains(field)) { - return Long.toString(LongTermAttributeImpl.copyBytesRefToLong(bytesTerm)); - } else if (SORTED_LONG_TERM_ATTRIBUTE_FIELDS.contains(field)) { - return Long.toString(SortableLongTermAttributeImpl.copyBytesRefToLong(bytesTerm)); - } else { - if (options != null && options.charset != null && !options.charset.isEmpty()) { - return new String(bytesTerm.bytes, bytesTerm.offset, bytesTerm.length, options.charset); - } else { - return bytesTerm.utf8ToString(); - } - } - } - - private void printTerm(ViewerWriter writer, String field, TermsEnum termsEnum, - PostingsEnum docsEnum, Integer maxDocs, Options options) - throws IOException { - final BytesRef bytesRef = termsEnum.term(); - StringBuilder termToString = new StringBuilder(); - termToString.append(termToString(field, bytesRef, options)); - if (options != null && options.dumpHexTerms) { - termToString.append(" ").append(bytesRef.toString()); - } - final int df = termsEnum.docFreq(); - double dfPercent = ((double) df / this.twitterReader.numDocs()) * 100.0; - TermDto termDto = new TermDto(field, termToString.toString(), Integer.toString(df), - String.format(Locale.US, "%.2f%%", dfPercent), - docsEnum, termsEnum, maxDocs); - termDto.write(writer, twitterReader); - writer.newline(); - } - - private static void appendFrequencyAndPositions(ViewerWriter writer, String field, - PostingsEnum docsEnum, EarlybirdIndexSegmentAtomicReader twitterReader) throws IOException { - final int frequency = docsEnum.freq(); - writer.name("freq").value(Integer.toString(frequency)); - - Schema schema = twitterReader.getSchema(); - Schema.FieldInfo fieldInfo = schema.getFieldInfo(field); - - if (fieldInfo != null - && (fieldInfo.getFieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS - || fieldInfo.getFieldType().indexOptions() - == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)) { - appendPositions(writer, docsEnum); - } - } - - private static void appendPositions(ViewerWriter writer, PostingsEnum docsAndPositionsEnum) - throws IOException { - writer.name("positions"); - - writer.beginArray(); - final int frequency = docsAndPositionsEnum.freq(); - for (int i = 0; i < frequency; i++) { - int position = docsAndPositionsEnum.nextPosition(); - writer.value(Integer.toString(position)); - } - writer.endArray(); - } - - private static void appendDocs(ViewerWriter writer, TermsEnum termsEnum, int maxDocs, - EarlybirdIndexSegmentAtomicReader twitterReader) - throws IOException { - writer.name("docIds"); - - writer.beginArray(); - - PostingsEnum docs = termsEnum.postings(null, 0); - int docsReturned = 0; - int docId; - boolean endedEarly = false; - DocIDToTweetIDMapper mapper = twitterReader.getSegmentData().getDocIDToTweetIDMapper(); - while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - if (docsReturned < maxDocs) { - docsReturned++; - long tweetID = mapper.getTweetID(docId); - - writer.beginObject(); - writer.name("docId").value(Long.toString(docId)); - writer.name("tweetId").value(Long.toString(tweetID)); - writer.endObject(); - } else { - endedEarly = true; - break; - } - } - if (endedEarly) { - writer.beginObject(); - writer.name("status").value("ended early"); - writer.endObject(); - } - writer.endArray(); - } - - /** - * Prints generic stats for all fields in the currently viewed index. - */ - public void dumpStats(ViewerWriter writer) throws IOException { - writer.beginObject(); - - printHeader(writer); - // stats section - writer.name("stats"); - writer.beginArray(); - writer.newline(); - for (String field : sortedFields()) { - Terms terms = twitterReader.terms(field); - if (terms != null) { - printStats(writer, field, terms); - } - } - writer.endArray(); - writer.endObject(); - } - - private void printStats(ViewerWriter writer, String field, Terms terms) throws IOException { - StatsDto statsDto = new StatsDto( - field, String.valueOf(terms.size()), terms.getClass().getCanonicalName()); - statsDto.write(writer); - writer.newline(); - } - - private void printHeader(ViewerWriter writer) throws IOException { - writer.name("timeSliceId").value(Long.toString(this.searcher.getTimeSliceID())); - writer.name("maxDocNumber").value(Integer.toString(this.twitterReader.maxDoc())); - writer.newline(); - } - - private static String formatField(String field) { - return String.format("%20s", field); - } - - /** - * Dumps out the schema of the current segment. - * @param writer to be used for printing - */ - public void dumpSchema(ViewerWriter writer) throws IOException { - writer.beginObject(); - printHeader(writer); - writer.name("schemaFields"); - writer.beginArray(); - writer.newline(); - Schema schema = this.twitterReader.getSchema(); - // The fields in the schema are not sorted. Sort them so that the output is deterministic - Set fieldNameSet = new TreeSet<>(); - for (Schema.FieldInfo fieldInfo: schema.getFieldInfos()) { - fieldNameSet.add(fieldInfo.getName()); - } - for (String fieldName : fieldNameSet) { - writer.value(fieldName); - writer.newline(); - } - writer.endArray(); - writer.endObject(); - } - - /** - * Dumps out the indexed fields inside the current segment. - * Mainly used to help the front end populate the fields. - * @param writer writer to be used for printing - */ - public void dumpFields(ViewerWriter writer) throws IOException { - writer.beginObject(); - printHeader(writer); - writer.name("fields"); - writer.beginArray(); - writer.newline(); - for (String field : sortedFields()) { - writer.value(field); - writer.newline(); - } - writer.endArray(); - writer.endObject(); - } - - /** - * Dumps out the mapping of the tweet/tweetId to - * a docId as well as segment/timeslide pair. - * @param writer writer to be used for writing - * @param tweetId tweetId that is input by user - */ - public void dumpTweetIdToDocIdMapping(ViewerWriter writer, long tweetId) throws IOException { - writer.beginObject(); - printHeader(writer); - writer.name("tweetId").value(Long.toString(tweetId)); - int docId = twitterReader.getSegmentData().getDocIDToTweetIDMapper().getDocID(tweetId); - - writer.name("docId").value(Integer.toString(docId)); - writer.endObject(); - writer.newline(); - } - - /** - * Dumps out the mapping of the docId to - * tweetId and timeslice/segmentId pairs. - * @param writer writer to be used for writing - * @param docid docId that is input by user - */ - public void dumpDocIdToTweetIdMapping(ViewerWriter writer, int docid) throws IOException { - writer.beginObject(); - printHeader(writer); - long tweetId = twitterReader.getSegmentData().getDocIDToTweetIDMapper().getTweetID(docid); - - writer.name("tweetId"); - if (tweetId >= 0) { - writer.value(Long.toString(tweetId)); - } else { - writer.value("Does not exist in segment"); - } - writer.name("docid").value(Integer.toString(docid)); - writer.endObject(); - } - - /** - * Print a response indicating that the given tweet id is not found in the index. - * - * Note that this method does not actually need the underlying index, and hence is setup as - * a util function. - */ - public static void writeTweetDoesNotExistResponse(ViewerWriter writer, long tweetId) - throws IOException { - writer.beginObject(); - writer.name("tweetId"); - writer.value(Long.toString(tweetId)); - writer.name("docId"); - writer.value("does not exist on this earlybird."); - writer.endObject(); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.docx b/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.docx new file mode 100644 index 000000000..5c7ebe1cf Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.java b/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.java deleted file mode 100644 index 0672a76be..000000000 --- a/src/java/com/twitter/search/earlybird/util/JsonViewerWriter.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.io.IOException; -import java.io.Writer; - -import com.google.gson.stream.JsonWriter; - -/** - * Wrapper class for JsonWriter that implements the - * ViewerWriter interface. - */ -public class JsonViewerWriter implements ViewerWriter { - - private final JsonWriter writer; - private final Writer out; - - public JsonViewerWriter(Writer out) { - this.out = out; - this.writer = new JsonWriter(out); - } - - - @Override - public ViewerWriter beginArray() throws IOException { - writer.beginArray(); - return this; - } - - @Override - public ViewerWriter beginObject() throws IOException { - writer.beginObject(); - return this; - } - - @Override - public ViewerWriter endArray() throws IOException { - writer.endArray(); - return this; - } - - @Override - public ViewerWriter endObject() throws IOException { - writer.endObject(); - return this; - } - - @Override - public ViewerWriter name(String field) throws IOException { - writer.name(field); - return this; - } - - @Override - public ViewerWriter value(String s) throws IOException { - writer.value(s); - return this; - } - - @Override - public ViewerWriter newline() throws IOException { - out.append('\n'); - return this; - } - - public void flush() throws IOException { - out.flush(); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.docx b/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.docx new file mode 100644 index 000000000..2b8527a52 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.java b/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.java deleted file mode 100644 index cdd9d50c3..000000000 --- a/src/java/com/twitter/search/earlybird/util/OneTaskScheduledExecutorManager.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.io.Closeable; -import java.io.IOException; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.concurrent.ScheduledExecutorServiceFactory; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchStatsReceiver; -import com.twitter.search.earlybird.exception.CriticalExceptionHandler; - -/** - * Executes a single periodic task. - */ -public abstract class OneTaskScheduledExecutorManager - extends ScheduledExecutorManager implements Closeable { - private final ScheduledExecutorTask scheduledTask; - private final PeriodicActionParams periodicActionParams; - - public OneTaskScheduledExecutorManager( - ScheduledExecutorServiceFactory executorServiceFactory, - String threadNameFormat, - boolean isDaemon, - PeriodicActionParams periodicActionParams, - ShutdownWaitTimeParams shutdownTiming, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler) { - this(executorServiceFactory.build(threadNameFormat, isDaemon), periodicActionParams, - shutdownTiming, searchStatsReceiver, criticalExceptionHandler); - } - - public OneTaskScheduledExecutorManager( - ScheduledExecutorService executor, - PeriodicActionParams periodicActionParams, - ShutdownWaitTimeParams shutdownTiming, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler) { - this(executor, periodicActionParams, shutdownTiming, searchStatsReceiver, null, - criticalExceptionHandler, Clock.SYSTEM_CLOCK); - } - - public OneTaskScheduledExecutorManager( - ScheduledExecutorService executor, - PeriodicActionParams periodicActionParams, - ShutdownWaitTimeParams shutdownWaitTimeParams, - SearchStatsReceiver searchStatsReceiver, - SearchCounter iterationCounter, - CriticalExceptionHandler criticalExceptionHandler, - Clock clock) { - super(executor, shutdownWaitTimeParams, searchStatsReceiver, iterationCounter, - criticalExceptionHandler, clock); - - this.periodicActionParams = periodicActionParams; - this.scheduledTask = new ScheduledExecutorTask(getIterationCounter(), clock) { - @Override - protected void runOneIteration() { - OneTaskScheduledExecutorManager.this.runOneIteration(); - } - }; - } - - /** - * Schedule the single internally specified task returned by getScheduledTask. - */ - public ScheduledFuture schedule() { - return this.scheduleNewTask( - this.getScheduledTask(), - this.periodicActionParams - ); - } - - /** - * The code that the task executes. - */ - protected abstract void runOneIteration(); - - public ScheduledExecutorTask getScheduledTask() { - return scheduledTask; - } - - @Override - public void close() throws IOException { - try { - shutdown(); - } catch (InterruptedException e) { - throw new IOException(e); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ParallelUtil.docx b/src/java/com/twitter/search/earlybird/util/ParallelUtil.docx new file mode 100644 index 000000000..0c721457b Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ParallelUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ParallelUtil.java b/src/java/com/twitter/search/earlybird/util/ParallelUtil.java deleted file mode 100644 index 9e570b1d9..000000000 --- a/src/java/com/twitter/search/earlybird/util/ParallelUtil.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadFactory; -import java.util.stream.Collectors; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; - -import com.twitter.util.Await; -import com.twitter.util.Future; -import com.twitter.util.Future$; -import com.twitter.util.FuturePool; -import com.twitter.util.FuturePool$; - -public final class ParallelUtil { - private ParallelUtil() { - } - - public static List parmap(String threadName, CheckedFunction fn, List input) - throws Exception { - return parmap(threadName, input.size(), fn, input); - } - - /** - * Runs a function in parallel across the elements of the list, and throws an exception if any - * of the functions throws, or returns the results. - * - * Uses as many threads as there are elements in the input, so only use this for tasks that - * require significant CPU for each element, and have less elements than the number of cores. - */ - public static List parmap( - String threadName, int threadPoolSize, CheckedFunction fn, List input) - throws Exception { - ExecutorService executor = Executors.newFixedThreadPool(threadPoolSize, - buildThreadFactory(threadName)); - FuturePool futurePool = FuturePool$.MODULE$.apply(executor); - - List> futures = input - .stream() - .map(in -> futurePool.apply(() -> { - try { - return fn.apply(in); - } catch (Exception e) { - throw new RuntimeException(e); - } - })).collect(Collectors.toList()); - - try { - return Await.result(Future$.MODULE$.collect(futures)); - } finally { - executor.shutdownNow(); - } - } - - private static ThreadFactory buildThreadFactory(String threadNameFormat) { - return new ThreadFactoryBuilder() - .setNameFormat(threadNameFormat) - .setDaemon(false) - .build(); - } - - @FunctionalInterface - public interface CheckedFunction { - /** - * A function from T to R that throws checked Exceptions. - */ - R apply(T t) throws Exception; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.docx b/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.docx new file mode 100644 index 000000000..7911aa7d1 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.java b/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.java deleted file mode 100644 index b2f148b4b..000000000 --- a/src/java/com/twitter/search/earlybird/util/PeriodicActionParams.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.concurrent.TimeUnit; - -/** - * Specifies timing and type of period actions that we schedule. - * - * See: - * https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ScheduledExecutorService.html - */ -public final class PeriodicActionParams { - private enum DelayType { - FIXED_DELAY, - FIXED_RATE - } - - private long initialDelayDuration; - private long intervalDuration; - private TimeUnit intervalUnit; - private DelayType delayType; - - public long getInitialDelayDuration() { - return initialDelayDuration; - } - - public long getIntervalDuration() { - return intervalDuration; - } - - public TimeUnit getIntervalUnit() { - return intervalUnit; - } - - public DelayType getDelayType() { - return delayType; - } - - private PeriodicActionParams( - DelayType delayType, - long initialDelayDuration, - long intervalDuration, - TimeUnit intervalUnit) { - this.delayType = delayType; - this.intervalDuration = intervalDuration; - this.initialDelayDuration = initialDelayDuration; - this.intervalUnit = intervalUnit; - } - - // Runs start at times start, start+X, start+2*X etc., so they can possibly overlap. - public static PeriodicActionParams atFixedRate( - long intervalDuration, - TimeUnit intervalUnit) { - return new PeriodicActionParams(DelayType.FIXED_RATE, 0, - intervalDuration, intervalUnit); - } - - // Delay between every run. - // The order of what happens is: - // initial delay, run task, wait X time, run task, wait X time, etc. - // Runs can't overlap. - public static PeriodicActionParams withIntialWaitAndFixedDelay( - long initialDelayDuration, - long intervalDuration, - TimeUnit intervalUnit) { - return new PeriodicActionParams(DelayType.FIXED_DELAY, initialDelayDuration, - intervalDuration, intervalUnit); - } - - // Delay between every run. - public static PeriodicActionParams withFixedDelay( - long intervalDuration, - TimeUnit intervalUnit) { - return withIntialWaitAndFixedDelay(0, intervalDuration, intervalUnit); - } - - boolean isFixedDelay() { - return this.delayType == DelayType.FIXED_DELAY; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.docx b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.docx new file mode 100644 index 000000000..b684c63ad Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.java b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.java deleted file mode 100644 index 54f39c1ac..000000000 --- a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorManager.java +++ /dev/null @@ -1,150 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchStatsReceiver; -import com.twitter.search.earlybird.exception.CriticalExceptionHandler; - -/** - * Base class for classes that run periodic tasks. - */ -public abstract class ScheduledExecutorManager { - private static final Logger LOG = LoggerFactory.getLogger(ScheduledExecutorManager.class); - private static final long SHUTDOWN_WAIT_INTERVAL_SEC = 30; - - public static final String SCHEDULED_EXECUTOR_TASK_PREFIX = "scheduled_executor_task_"; - - private final String name; - private final ScheduledExecutorService executor; - - private final ShutdownWaitTimeParams shutdownWaitTimeParams; - - private final SearchCounter iterationCounter; - private final SearchStatsReceiver searchStatsReceiver; - - protected final CriticalExceptionHandler criticalExceptionHandler; - private final Clock clock; - - protected boolean shouldLog = true; - - public ScheduledExecutorManager( - ScheduledExecutorService executor, - ShutdownWaitTimeParams shutdownWaitTimeParams, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler, - Clock clock) { - this(executor, shutdownWaitTimeParams, searchStatsReceiver, null, - criticalExceptionHandler, clock); - } - - ScheduledExecutorManager( - ScheduledExecutorService executor, - ShutdownWaitTimeParams shutdownWaitTimeParams, - SearchStatsReceiver searchStatsReceiver, - SearchCounter iterationCounter, - CriticalExceptionHandler criticalExceptionHandler, - Clock clock) { - this.name = getClass().getSimpleName(); - this.executor = executor; - this.criticalExceptionHandler = criticalExceptionHandler; - this.shutdownWaitTimeParams = shutdownWaitTimeParams; - - if (iterationCounter != null) { - this.iterationCounter = iterationCounter; - } else { - this.iterationCounter = searchStatsReceiver.getCounter(SCHEDULED_EXECUTOR_TASK_PREFIX + name); - } - - this.searchStatsReceiver = searchStatsReceiver; - this.clock = clock; - } - - /** - * Schedule a task. - */ - protected final ScheduledFuture scheduleNewTask( - ScheduledExecutorTask task, - PeriodicActionParams periodicActionParams) { - long interval = periodicActionParams.getIntervalDuration(); - TimeUnit timeUnit = periodicActionParams.getIntervalUnit(); - long initialDelay = periodicActionParams.getInitialDelayDuration(); - - if (interval <= 0) { - String message = String.format( - "Not scheduling manager %s for wrong interval %d %s", name, interval, timeUnit); - LOG.error(message); - throw new UnsupportedOperationException(message); - } - - if (shouldLog) { - LOG.info("Scheduling to run {} every {} {} with {}", name, interval, timeUnit, - periodicActionParams.getDelayType()); - } - final ScheduledFuture scheduledFuture; - if (periodicActionParams.isFixedDelay()) { - scheduledFuture = executor.scheduleWithFixedDelay(task, initialDelay, interval, timeUnit); - } else { - scheduledFuture = executor.scheduleAtFixedRate(task, initialDelay, interval, timeUnit); - } - return scheduledFuture; - } - - /** - * Shutdown everything that's running with the executor. - */ - public boolean shutdown() throws InterruptedException { - LOG.info("Start shutting down {}.", name); - executor.shutdownNow(); - - boolean terminated = false; - long waitSeconds = shutdownWaitTimeParams.getWaitUnit().toSeconds( - shutdownWaitTimeParams.getWaitDuration() - ); - - if (waitSeconds == 0) { - LOG.info("Not waiting at all for {}, wait time is set to zero.", name); - } else { - while (!terminated && waitSeconds > 0) { - long waitTime = Math.min(waitSeconds, SHUTDOWN_WAIT_INTERVAL_SEC); - terminated = executor.awaitTermination(waitTime, TimeUnit.SECONDS); - waitSeconds -= waitTime; - - if (!terminated) { - LOG.info("Still shutting down {} ...", name); - } - } - } - - LOG.info("Done shutting down {}, terminated: {}", name, terminated); - - shutdownComponent(); - return terminated; - } - - protected ScheduledExecutorService getExecutor() { - return executor; - } - - public final String getName() { - return name; - } - - public SearchCounter getIterationCounter() { - return iterationCounter; - } - - protected final SearchStatsReceiver getSearchStatsReceiver() { - return searchStatsReceiver; - } - - // Override if you need to shutdown additional services. - protected void shutdownComponent() { - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.docx b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.docx new file mode 100644 index 000000000..abfcf9994 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.java b/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.java deleted file mode 100644 index a6cd074c0..000000000 --- a/src/java/com/twitter/search/earlybird/util/ScheduledExecutorTask.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.twitter.search.earlybird.util; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.metrics.SearchCounter; - -public abstract class ScheduledExecutorTask implements Runnable { - private final SearchCounter counter; - protected final Clock clock; - - public ScheduledExecutorTask(SearchCounter counter, Clock clock) { - Preconditions.checkNotNull(counter); - this.counter = counter; - this.clock = clock; - } - - @Override - public final void run() { - counter.increment(); - runOneIteration(); - } - - @VisibleForTesting - protected abstract void runOneIteration(); -} diff --git a/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.docx b/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.docx new file mode 100644 index 000000000..5ff73fc9a Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.java b/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.java deleted file mode 100644 index f2ba966d3..000000000 --- a/src/java/com/twitter/search/earlybird/util/ScrubGenUtil.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.text.ParseException; -import java.util.Date; - -import org.apache.commons.lang3.time.FastDateFormat; - -public final class ScrubGenUtil { - public static final FastDateFormat SCRUB_GEN_DATE_FORMAT = FastDateFormat.getInstance("yyyyMMdd"); - - private ScrubGenUtil() { } - - /** - * Helper method to parse a scrub gen from String to date - * - * @param scrubGen - * @return scrubGen in Date type - */ - public static Date parseScrubGenToDate(String scrubGen) { - try { - return SCRUB_GEN_DATE_FORMAT.parse(scrubGen); - } catch (ParseException e) { - String msg = "Malformed scrub gen date: " + scrubGen; - // If we are running a scrub gen and the date is bad we should quit and not continue. - throw new RuntimeException(msg, e); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.docx b/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.docx new file mode 100644 index 000000000..06dc5f5f5 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.java b/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.java deleted file mode 100644 index ec056e93a..000000000 --- a/src/java/com/twitter/search/earlybird/util/ShutdownWaitTimeParams.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.concurrent.TimeUnit; - -/** - * Specifies how much time do we wait when shutting down a task. - */ -public class ShutdownWaitTimeParams { - private long waitDuration; - private TimeUnit waitUnit; - - public ShutdownWaitTimeParams(long waitDuration, TimeUnit waitUnit) { - this.waitDuration = waitDuration; - this.waitUnit = waitUnit; - } - - public long getWaitDuration() { - return waitDuration; - } - - public TimeUnit getWaitUnit() { - return waitUnit; - } - - /** - * Returns a ShutdownWaitTimeParams instance that instructs the caller to wait indefinitely for - * the task to shut down. - */ - public static ShutdownWaitTimeParams indefinitely() { - return new ShutdownWaitTimeParams(Long.MAX_VALUE, TimeUnit.DAYS); - } - - /** - * Returns a ShutdownWaitTimeParams instance that instructs the caller to shut down the task - * immediately. - */ - public static ShutdownWaitTimeParams immediately() { - return new ShutdownWaitTimeParams(0, TimeUnit.MILLISECONDS); - } -} diff --git a/src/java/com/twitter/search/earlybird/util/TermCountMonitor.docx b/src/java/com/twitter/search/earlybird/util/TermCountMonitor.docx new file mode 100644 index 000000000..222870d95 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/TermCountMonitor.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/TermCountMonitor.java b/src/java/com/twitter/search/earlybird/util/TermCountMonitor.java deleted file mode 100644 index 55d747754..000000000 --- a/src/java/com/twitter/search/earlybird/util/TermCountMonitor.java +++ /dev/null @@ -1,338 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import java.util.stream.Collectors; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import org.apache.commons.lang.mutable.MutableLong; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.Terms; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.concurrent.ScheduledExecutorServiceFactory; -import com.twitter.search.common.metrics.SearchLongGauge; -import com.twitter.search.common.metrics.SearchStatsReceiver; -import com.twitter.search.common.metrics.SearchTimer; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.exception.CriticalExceptionHandler; -import com.twitter.search.earlybird.index.EarlybirdSingleSegmentSearcher; -import com.twitter.search.earlybird.partition.SegmentInfo; -import com.twitter.search.earlybird.partition.SegmentManager; - -/** - * A background task that periodically gets and exports the number of terms per field that are - * indexed on this earlybird, averaged over all segments. - * Specifically used for making sure that we are not missing terms for any fields in the search - * archives. - * The task loops though all the segments that are indexed by this earlybird, and for each segment - * looks at the term counts for all fields in that segment. - * - * Also keeps track of the number of fields that do not have any term counts (or below the specified - * threshold) in the data that is indexed on this earlybird. - */ -public class TermCountMonitor extends OneTaskScheduledExecutorManager { - private static final Logger LOG = LoggerFactory.getLogger(TermCountMonitor.class); - - private static final String THREAD_NAME_FORMAT = "TermCountMonitor-%d"; - private static final boolean THREAD_IS_DAEMON = true; - - public static final String RUN_INTERVAL_MINUTES_CONFIG_NAME = - "term_count_monitor_run_interval_minutes"; - - private static Function termStatNameFunc = - field -> "term_count_on_field_" + field; - private static Function tokenStatNameFunc = - field -> "token_count_on_field_" + field; - private static Function missingFieldStatNameFunc = - field -> "term_count_monitor_missing_field_" + field; - - private static class RawFieldCounter { - private MutableLong numTerms = new MutableLong(0L); - private MutableLong numTokens = new MutableLong(0L); - } - - @VisibleForTesting - static class ExportedFieldCounter { - private final AtomicLong numTerms; - private final AtomicLong numTokens; - - ExportedFieldCounter(RawFieldCounter rawCounter) { - this.numTerms = new AtomicLong(rawCounter.numTerms.longValue()); - this.numTokens = new AtomicLong(rawCounter.numTokens.longValue()); - } - - ExportedFieldCounter(long numInitialTerms, long numInitialTokens) { - this.numTerms = new AtomicLong(numInitialTerms); - this.numTokens = new AtomicLong(numInitialTokens); - } - - @VisibleForTesting - long getNumTerms() { - return numTerms.longValue(); - } - - @VisibleForTesting - long getNumTokens() { - return numTokens.longValue(); - } - } - - private final int fieldMinTermCount = - EarlybirdConfig.getInt("term_count_monitor_min_count", 0); - - private final SegmentManager segmentManager; - private final Map missingFields; - private final Map termStats; - private final Map tokenStats; - private final Map exportedCounts; - private final SearchLongGauge termCountOnAllFields; - private final SearchLongGauge tokenCountOnAllFields; - private final SearchLongGauge fieldsWithNoTermCountStat; - private final SearchLongGauge isRunningStat; - private final SearchTimerStats checkTimeStat; - - @Override - protected void runOneIteration() { - LOG.info("Starting to get per-field term counts"); - isRunningStat.set(1); - final SearchTimer timer = checkTimeStat.startNewTimer(); - try { - updateFieldTermCounts(); - } catch (Exception ex) { - LOG.error("Unexpected exception while getting per-field term counts", ex); - } finally { - LOG.info( - "Done getting per-field term counts. Fields with low term counts: {}", - getFieldsWithLowTermCount()); - isRunningStat.set(0); - checkTimeStat.stopTimerAndIncrement(timer); - } - } - - /** - * Create a term count monitor which monitors the number of terms in segments - * managed by the given segment manager. - */ - public TermCountMonitor( - SegmentManager segmentManager, - ScheduledExecutorServiceFactory executorServiceFactory, - long shutdownWaitDuration, - TimeUnit shutdownWaitUnit, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler) { - super( - executorServiceFactory, - THREAD_NAME_FORMAT, - THREAD_IS_DAEMON, - PeriodicActionParams.atFixedRate( - EarlybirdConfig.getInt(RUN_INTERVAL_MINUTES_CONFIG_NAME, -1), - TimeUnit.MINUTES), - new ShutdownWaitTimeParams( - shutdownWaitDuration, - shutdownWaitUnit - ), - searchStatsReceiver, - criticalExceptionHandler); - this.segmentManager = segmentManager; - this.missingFields = new HashMap<>(); - this.termStats = new HashMap<>(); - this.tokenStats = new HashMap<>(); - this.exportedCounts = new HashMap<>(); - this.termCountOnAllFields = getSearchStatsReceiver().getLongGauge("term_count_on_all_fields"); - this.tokenCountOnAllFields = getSearchStatsReceiver().getLongGauge("token_count_on_all_fields"); - this.fieldsWithNoTermCountStat = - getSearchStatsReceiver().getLongGauge("fields_with_low_term_counts"); - this.isRunningStat = - getSearchStatsReceiver().getLongGauge("term_count_monitor_is_running"); - this.checkTimeStat = - getSearchStatsReceiver().getTimerStats( - "term_count_monitor_check_time", TimeUnit.MILLISECONDS, true, true, false); - } - - private SearchLongGauge getOrCreateLongGauge( - Map gauges, String field, Function nameSupplier) { - SearchLongGauge stat = gauges.get(field); - - if (stat == null) { - stat = getSearchStatsReceiver().getLongGauge(nameSupplier.apply(field)); - gauges.put(field, stat); - } - - return stat; - } - - private void updateFieldTermCounts() { - // 0. Get the current per-field term counts - Map newCounts = getFieldStats(); - LOG.info("Computed field stats for all segments"); - - // 1. Update all existing keys - for (Map.Entry exportedCount : exportedCounts.entrySet()) { - String field = exportedCount.getKey(); - ExportedFieldCounter exportedCountValue = exportedCount.getValue(); - - RawFieldCounter newCount = newCounts.get(field); - if (newCount == null) { - exportedCountValue.numTerms.set(0L); - exportedCountValue.numTokens.set(0L); - } else { - exportedCountValue.numTerms.set(newCount.numTerms.longValue()); - exportedCountValue.numTokens.set(newCount.numTokens.longValue()); - - // clean up so that we don't check this field again when we look for new field - newCounts.remove(field); - } - } - - // 2. Add and export all new fields' term counts - for (Map.Entry newCount: newCounts.entrySet()) { - String field = newCount.getKey(); - Preconditions.checkState(!exportedCounts.containsKey(field), - "Should have already processed and removed existing fields: " + field); - - ExportedFieldCounter newStat = new ExportedFieldCounter(newCount.getValue()); - exportedCounts.put(field, newStat); - } - - // 3. Export as a stat the term counts for all the known fields. - for (Map.Entry exportedCount : exportedCounts.entrySet()) { - String field = exportedCount.getKey(); - ExportedFieldCounter counter = exportedCount.getValue(); - - getOrCreateLongGauge(termStats, field, termStatNameFunc).set(counter.numTerms.get()); - getOrCreateLongGauge(tokenStats, field, tokenStatNameFunc).set(counter.numTokens.get()); - } - - // 4. Export as a stat, number of fields not having enough term counts (i.e. <= 0) - int fieldsWithNoTermCounts = 0; - for (Map.Entry fieldTermCount : exportedCounts.entrySet()) { - String field = fieldTermCount.getKey(); - AtomicLong exportedCountValue = fieldTermCount.getValue().numTerms; - if (exportedCountValue.get() <= fieldMinTermCount) { - LOG.warn( - "Found a field with too few term counts. Field: {} count: {}", - field, exportedCountValue); - fieldsWithNoTermCounts++; - } - } - this.fieldsWithNoTermCountStat.set(fieldsWithNoTermCounts); - } - - /** - * Loops through all segments, and for each field gets the average term/token count. - * Based on that, returns a map from each field to its term/token count (average per segment). - */ - private Map getFieldStats() { - Iterable segmentInfos = segmentManager.getSegmentInfos( - SegmentManager.Filter.Enabled, SegmentManager.Order.NEW_TO_OLD); - Map rawCounts = new HashMap<>(); - - ImmutableSchemaInterface schemaSnapshot = - segmentManager.getEarlybirdIndexConfig().getSchema().getSchemaSnapshot(); - Set missingFieldsCandidates = schemaSnapshot - .getFieldInfos() - .stream() - .filter(fieldInfo -> fieldInfo.getFieldType().indexOptions() != IndexOptions.NONE) - .map(Schema.FieldInfo::getName) - .collect(Collectors.toSet()); - int segmentCount = 0; - for (SegmentInfo segmentInfo : segmentInfos) { - segmentCount++; - try { - EarlybirdSingleSegmentSearcher searcher = segmentManager.getSearcher( - segmentInfo.getTimeSliceID(), schemaSnapshot); - if (searcher != null) { - EarlybirdIndexSegmentAtomicReader reader = searcher.getTwitterIndexReader(); - for (Schema.FieldInfo fieldInfo : schemaSnapshot.getFieldInfos()) { - if (fieldInfo.getFieldType().indexOptions() == IndexOptions.NONE) { - continue; - } - - String fieldName = fieldInfo.getName(); - RawFieldCounter count = rawCounts.get(fieldName); - if (count == null) { - count = new RawFieldCounter(); - rawCounts.put(fieldName, count); - } - Terms terms = reader.terms(fieldName); - if (terms != null) { - missingFieldsCandidates.remove(fieldName); - count.numTerms.add(terms.size()); - long sumTotalTermFreq = terms.getSumTotalTermFreq(); - if (sumTotalTermFreq != -1) { - count.numTokens.add(sumTotalTermFreq); - } - } - } - } - } catch (Exception e) { - LOG.error("Exception getting average term count per field: " + segmentInfo, e); - } - } - - // Update missing fields stats. - missingFieldsCandidates.forEach( - field -> getOrCreateLongGauge(missingFields, field, missingFieldStatNameFunc).set(1)); - missingFields.keySet().stream() - .filter( - field -> !missingFieldsCandidates.contains(field)) - .forEach( - field -> getOrCreateLongGauge(missingFields, field, missingFieldStatNameFunc).set(0)); - - long totalTermCount = 0; - long totalTokenCount = 0; - if (segmentCount == 0) { - LOG.error("No segments are found to calculate per-field term counts."); - } else { - LOG.debug("TermCountMonitor.getPerFieldTermCount.segmentCount = {}", segmentCount); - LOG.debug(" field: term count (average per segment)"); - for (Map.Entry entry : rawCounts.entrySet()) { - String field = entry.getKey(); - final long averageTermCount = entry.getValue().numTerms.longValue() / segmentCount; - final long averageTokenCount = entry.getValue().numTokens.longValue() / segmentCount; - totalTermCount += entry.getValue().numTerms.longValue(); - totalTokenCount += entry.getValue().numTokens.longValue(); - - LOG.debug(" '{} term': {}", field, averageTermCount); - LOG.debug(" '{} token': {}", field, averageTokenCount); - - entry.getValue().numTerms.setValue(averageTermCount); - entry.getValue().numTokens.setValue(averageTokenCount); - } - } - LOG.info("Total term count: {}", totalTermCount); - LOG.info("Total token count: {}", totalTokenCount); - this.termCountOnAllFields.set(totalTermCount); - this.tokenCountOnAllFields.set(totalTokenCount); - - return rawCounts; - } - - @VisibleForTesting - Map getExportedCounts() { - return Collections.unmodifiableMap(this.exportedCounts); - } - - @VisibleForTesting - long getFieldsWithLowTermCount() { - return fieldsWithNoTermCountStat.get(); - } - - @VisibleForTesting - Map getMissingFields() { - return missingFields; - } -} diff --git a/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.docx b/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.docx new file mode 100644 index 000000000..30a987c56 Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.java b/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.java deleted file mode 100644 index e33433656..000000000 --- a/src/java/com/twitter/search/earlybird/util/TweetCountMonitor.java +++ /dev/null @@ -1,447 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Maps; - -import org.apache.commons.lang.mutable.MutableInt; -import org.apache.commons.lang.mutable.MutableLong; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common.collections.Pair; -import com.twitter.search.common.concurrent.ScheduledExecutorServiceFactory; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchLongGauge; -import com.twitter.search.common.metrics.SearchStatsReceiver; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.common.partitioning.base.Segment; -import com.twitter.search.common.schema.base.ImmutableSchemaInterface; -import com.twitter.search.common.schema.base.Schema; -import com.twitter.search.core.earlybird.index.DocIDToTweetIDMapper; -import com.twitter.search.core.earlybird.index.EarlybirdIndexSegmentAtomicReader; -import com.twitter.search.core.earlybird.index.TimeMapper; -import com.twitter.search.earlybird.common.config.EarlybirdConfig; -import com.twitter.search.earlybird.exception.CriticalExceptionHandler; -import com.twitter.search.earlybird.index.EarlybirdSingleSegmentSearcher; -import com.twitter.search.earlybird.partition.SegmentInfo; -import com.twitter.search.earlybird.partition.SegmentManager; - -/** - * A background task that periodically gets and exports the number of tweets per hour that are - * indexed on this earlybird. - * Specifically used for making sure that we are not missing data for any hours in the search - * archives. - * The task loops though all the segments that are indexed by this earlybird, and for each segment - * looks at all the createdAt dates for all of the documents in that segment. - * - * Also keeps track off an exposes as a stat the number of hours that do not have any tweets in the - * min/max range of data that IS indexed on this earlybird. i.e if we only have data for - * 2006/01/01:02 and 2006/01/01:04, it will consider 2006/01/01:03 as a missing hour. - * Hours before 2006/01/01:02 or after 2006/01/01:04 will not be considered as missing. - */ -public class TweetCountMonitor extends OneTaskScheduledExecutorManager { - private static final Logger LOG = LoggerFactory.getLogger(TweetCountMonitor.class); - - private static final String THREAD_NAME_FORMAT = "TweetCountMonitor-%d"; - private static final boolean THREAD_IS_DAEMON = true; - - public static final String RUN_INTERVAL_MINUTES_CONFIG_NAME = - "tweet_count_monitor_run_interval_minutes"; - public static final String START_CHECK_HOUR_CONFIG_NAME = - "tweet_count_monitor_start_check_hour"; - public static final String HOURLY_MIN_COUNT_CONFIG_NAME = - "tweet_count_monitor_hourly_min_count"; - public static final String DAILY_MIN_COUNT_CONFIG_NAME = - "tweet_count_monitor_daily_min_count"; - - @VisibleForTesting - public static final AtomicInteger INSTANCE_COUNTER = new AtomicInteger(0); - - private static final long MILLIS_IN_A_DAY = TimeUnit.DAYS.toMillis(1); - - private final SegmentManager segmentManager; - - private final SearchStatsReceiver searchStatsReceiver; - private final int instanceCounter; - - // The first date in format "YYYYMMDDHH" that we want to check counts for. - private final int startCheckHour; - // The last date in format "YYYYMMDDHH" that we want to check counts for. - private final int endCheckHour; - //Smallest number of docs we expect to have for each day. - private final int dailyMinCount; - // Smallest number of docs we expect to have for each hour. - private final int hourlyMinCount; - // Binary stat, set to 0 when the monitor is running - private final SearchLongGauge isRunningStat; - // How long each iteration takes - private final SearchTimerStats checkTimeStat; - - private final Map fieldTermCounters; - private final Map fieldCheckTimeStats; - - /** - * Create a TweetCountMonitor to monitor all segments in the given segmentManager - */ - public TweetCountMonitor( - SegmentManager segmentManager, - ScheduledExecutorServiceFactory executorServiceFactory, - long shutdownWaitDuration, - TimeUnit shutdownWaitUnit, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler) { - this(segmentManager, - EarlybirdConfig.getInt(START_CHECK_HOUR_CONFIG_NAME, 0), - EarlybirdConfig.getInt(RUN_INTERVAL_MINUTES_CONFIG_NAME, -1), - EarlybirdConfig.getInt(HOURLY_MIN_COUNT_CONFIG_NAME, 0), - EarlybirdConfig.getInt(DAILY_MIN_COUNT_CONFIG_NAME, 0), - executorServiceFactory, - shutdownWaitDuration, - shutdownWaitUnit, - searchStatsReceiver, - criticalExceptionHandler); - } - - @VisibleForTesting - TweetCountMonitor( - SegmentManager segmentManager, - int startCheckHourFromConfig, - int schedulePeriodMinutes, - int hourlyMinCount, - int dailyMinCount, - ScheduledExecutorServiceFactory executorServiceFactory, - long shutdownWaitDuration, - TimeUnit shutdownWaitUnit, - SearchStatsReceiver searchStatsReceiver, - CriticalExceptionHandler criticalExceptionHandler) { - super( - executorServiceFactory, - THREAD_NAME_FORMAT, - THREAD_IS_DAEMON, - PeriodicActionParams.atFixedRate( - schedulePeriodMinutes, - TimeUnit.MINUTES - ), - new ShutdownWaitTimeParams( - shutdownWaitDuration, - shutdownWaitUnit - ), - searchStatsReceiver, - criticalExceptionHandler); - this.segmentManager = segmentManager; - this.searchStatsReceiver = searchStatsReceiver; - this.instanceCounter = INSTANCE_COUNTER.incrementAndGet(); - this.hourlyMinCount = hourlyMinCount; - this.dailyMinCount = dailyMinCount; - - String isRunningStatName = "tweet_count_monitor_is_running_v_" + this.instanceCounter; - this.isRunningStat = SearchLongGauge.export(isRunningStatName); - String checkTimeStatName = "tweet_count_monitor_check_time_v_" + this.instanceCounter; - this.checkTimeStat = SearchTimerStats.export(checkTimeStatName, TimeUnit.MILLISECONDS, true); - - this.startCheckHour = Math.max( - startCheckHourFromConfig, - dateToHourValue(segmentManager.getPartitionConfig().getTierStartDate())); - this.endCheckHour = dateToHourValue(segmentManager.getPartitionConfig().getTierEndDate()); - - this.fieldTermCounters = Maps.newHashMap(); - this.fieldTermCounters.put( - FieldTermCounter.TWEET_COUNT_KEY, - new FieldTermCounter( - FieldTermCounter.TWEET_COUNT_KEY, - instanceCounter, - startCheckHour, - endCheckHour, - hourlyMinCount, - dailyMinCount)); - this.fieldCheckTimeStats = Maps.newHashMap(); - } - - private int dateToHourValue(Date date) { - Calendar cal = Calendar.getInstance(FieldTermCounter.TIME_ZONE); - cal.setTime(date); - return FieldTermCounter.getHourValue(cal); - } - - private void updateHourlyCounts() { - // Iterate the current index to count all tweets anf field hits. - Map> newCountMap = getNewTweetCountMap(); - - for (Map.Entry> newCounts : newCountMap.entrySet()) { - final String fieldName = newCounts.getKey(); - FieldTermCounter termCounter = fieldTermCounters.get(fieldName); - if (termCounter == null) { - termCounter = new FieldTermCounter( - fieldName, - instanceCounter, - startCheckHour, - endCheckHour, - hourlyMinCount, - dailyMinCount); - fieldTermCounters.put(fieldName, termCounter); - } - termCounter.runWithNewCounts(newCounts.getValue()); - } - } - - /** - * Loops through all segments, and all documents in each segment, and for each document - * gets the createdAt timestamp (in seconds) from the TimeMapper. - * Based on that, returns a map with the count of: - * . the number of tweets for each hour - * . the number of tweets corresponding to each field for each hour - */ - private Map> getNewTweetCountMap() { - Iterable segmentInfos = segmentManager.getSegmentInfos( - SegmentManager.Filter.Enabled, SegmentManager.Order.NEW_TO_OLD); - Map> newCountMap = Maps.newHashMap(); - - Map newCounts = Maps.newHashMap(); - newCountMap.put(FieldTermCounter.TWEET_COUNT_KEY, newCounts); - - ImmutableSchemaInterface schemaSnapshot = - segmentManager.getEarlybirdIndexConfig().getSchema().getSchemaSnapshot(); - Calendar cal = Calendar.getInstance(FieldTermCounter.TIME_ZONE); - for (SegmentInfo segmentInfo : segmentInfos) { - try { - EarlybirdSingleSegmentSearcher searcher = segmentManager.getSearcher( - segmentInfo.getTimeSliceID(), schemaSnapshot); - if (searcher != null) { - EarlybirdIndexSegmentAtomicReader reader = searcher.getTwitterIndexReader(); - TimeMapper timeMapper = reader.getSegmentData().getTimeMapper(); - List> outsideEndDateRangeDocList = new ArrayList<>(); - - // Get the number of tweets for each hour. - int docsOutsideEndDateRange = getNewTweetCountsForSegment( - segmentInfo, reader, timeMapper, cal, newCounts); - if (docsOutsideEndDateRange > 0) { - outsideEndDateRangeDocList.add(new Pair<>( - FieldTermCounter.TWEET_COUNT_KEY, docsOutsideEndDateRange)); - } - - // Get the number of tweets with corresponding field for each hour. - for (Schema.FieldInfo fieldInfo : schemaSnapshot.getFieldInfos()) { - if (fieldInfo.getFieldType().indexOptions() == IndexOptions.NONE) { - continue; - } - - String fieldName = fieldInfo.getName(); - docsOutsideEndDateRange = getNewFieldTweetCountsForSegment( - segmentInfo, reader, timeMapper, cal, fieldName, newCountMap); - if (docsOutsideEndDateRange > 0) { - outsideEndDateRangeDocList.add(new Pair<>(fieldName, docsOutsideEndDateRange)); - } - } - - LOG.info("Inspected segment: " + segmentInfo + " found " - + outsideEndDateRangeDocList.size() - + " fields with documents outside of segment end date."); - for (Pair outsideEndRange : outsideEndDateRangeDocList) { - LOG.info(" outside end date range - segment: " + segmentInfo.getSegmentName() - + " field: " + outsideEndRange.toString()); - } - } - } catch (IOException e) { - LOG.error("Exception getting daily tweet counts for timeslice: " + segmentInfo, e); - } - } - return newCountMap; - } - - private void incrementNumDocsWithIllegalTimeCounter(String segmentName, String fieldSuffix) { - String statName = String.format( - "num_docs_with_illegal_time_for_segment_%s%s_counter", segmentName, fieldSuffix); - SearchCounter counter = SearchCounter.export(statName); - counter.increment(); - } - - private int getNewTweetCountsForSegment( - SegmentInfo segmentInfo, - EarlybirdIndexSegmentAtomicReader reader, - TimeMapper timeMapper, - Calendar cal, - Map newTweetCounts) { - DocIDToTweetIDMapper tweetIdMapper = reader.getSegmentData().getDocIDToTweetIDMapper(); - long dataEndTimeExclusiveMillis = getDataEndTimeExclusiveMillis(segmentInfo); - int docsOutsideEndDateRange = 0; - int docId = Integer.MIN_VALUE; - while ((docId = tweetIdMapper.getNextDocID(docId)) != DocIDToTweetIDMapper.ID_NOT_FOUND) { - UpdateCountType updateCountType = - updateTweetCount(timeMapper, docId, dataEndTimeExclusiveMillis, cal, newTweetCounts); - if (updateCountType == UpdateCountType.ILLEGAL_TIME) { - incrementNumDocsWithIllegalTimeCounter(segmentInfo.getSegmentName(), ""); - } else if (updateCountType == UpdateCountType.OUT_OF_RANGE_TIME) { - docsOutsideEndDateRange++; - } - } - return docsOutsideEndDateRange; - } - - private int getNewFieldTweetCountsForSegment( - SegmentInfo segmentInfo, - EarlybirdIndexSegmentAtomicReader reader, - TimeMapper timeMapper, - Calendar cal, - String field, - Map> newCountMap) throws IOException { - int docsOutsideEndDateRange = 0; - Map fieldTweetCounts = - newCountMap.computeIfAbsent(field, k -> Maps.newHashMap()); - - Terms terms = reader.terms(field); - if (terms == null) { - LOG.warn("Field <" + field + "> is missing terms in segment: " - + segmentInfo.getSegmentName()); - return 0; - } - long startTimeMillis = System.currentTimeMillis(); - - long dataEndTimeExclusiveMillis = getDataEndTimeExclusiveMillis(segmentInfo); - for (TermsEnum termsEnum = terms.iterator(); termsEnum.next() != null;) { - DocIdSetIterator docsIterator = termsEnum.postings(null, PostingsEnum.NONE); - for (int docId = docsIterator.nextDoc(); - docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsIterator.nextDoc()) { - UpdateCountType updateCountType = updateTweetCount( - timeMapper, docId, dataEndTimeExclusiveMillis, cal, fieldTweetCounts); - if (updateCountType == UpdateCountType.ILLEGAL_TIME) { - incrementNumDocsWithIllegalTimeCounter( - segmentInfo.getSegmentName(), "_and_field_" + field); - } else if (updateCountType == UpdateCountType.OUT_OF_RANGE_TIME) { - docsOutsideEndDateRange++; - } - } - } - updateFieldRunTimeStats(field, System.currentTimeMillis() - startTimeMillis); - - return docsOutsideEndDateRange; - } - - private enum UpdateCountType { - OK_TIME, - ILLEGAL_TIME, - OUT_OF_RANGE_TIME, - } - - private static UpdateCountType updateTweetCount( - TimeMapper timeMapper, - int docId, - long dataEndTimeExclusiveMillis, - Calendar cal, - Map newTweetCounts) { - int timeSecs = timeMapper.getTime(docId); - if (timeSecs == TimeMapper.ILLEGAL_TIME) { - return UpdateCountType.ILLEGAL_TIME; - } - if (dataEndTimeExclusiveMillis == Segment.NO_DATA_END_TIME - || timeSecs * 1000L < dataEndTimeExclusiveMillis) { - Integer hourlyValue = FieldTermCounter.getHourValue(cal, timeSecs); - MutableInt count = newTweetCounts.get(hourlyValue); - if (count == null) { - count = new MutableInt(0); - newTweetCounts.put(hourlyValue, count); - } - count.increment(); - return UpdateCountType.OK_TIME; - } else { - return UpdateCountType.OUT_OF_RANGE_TIME; - } - } - - /** - * If a segment has an end date, return the last timestamp (exclusive, and in millis) for which - * we expect it to have data. - * @return Segment.NO_DATA_END_TIME if the segment does not have an end date. - */ - private long getDataEndTimeExclusiveMillis(SegmentInfo segmentInfo) { - long dataEndDate = segmentInfo.getSegment().getDataEndDateInclusiveMillis(); - if (dataEndDate == Segment.NO_DATA_END_TIME) { - return Segment.NO_DATA_END_TIME; - } else { - return dataEndDate + MILLIS_IN_A_DAY; - } - } - - private void updateFieldRunTimeStats(String fieldName, long runTimeMs) { - SearchTimerStats timerStats = fieldCheckTimeStats.get(fieldName); - if (timerStats == null) { - final String statName = "tweet_count_monitor_check_time_field_" + fieldName; - timerStats = searchStatsReceiver.getTimerStats( - statName, TimeUnit.MILLISECONDS, false, false, false); - fieldCheckTimeStats.put(fieldName, timerStats); - } - timerStats.timerIncrement(runTimeMs); - } - - @VisibleForTesting - String getStatName(String fieldName, Integer date) { - return FieldTermCounter.getStatName(fieldName, instanceCounter, date); - } - - @VisibleForTesting - Map getExportedCounts(String fieldName) { - if (fieldTermCounters.get(fieldName) == null) { - return null; - } else { - return fieldTermCounters.get(fieldName).getExportedCounts(); - } - } - - @VisibleForTesting - Map getDailyCounts(String fieldName) { - if (fieldTermCounters.get(fieldName) == null) { - return null; - } else { - return fieldTermCounters.get(fieldName).getDailyCounts(); - } - } - - @VisibleForTesting - long getHoursWithNoTweets(String fieldName) { - return fieldTermCounters.get(fieldName).getHoursWithNoTweets(); - } - - @VisibleForTesting - long getDaysWithNoTweets(String fieldName) { - return fieldTermCounters.get(fieldName).getDaysWithNoTweets(); - } - - @VisibleForTesting - Map getExportedHourlyCountStats(String fieldName) { - return fieldTermCounters.get(fieldName).getExportedHourlyCountStats(); - } - - @Override - protected void runOneIteration() { - LOG.info("Starting to get hourly tweet counts"); - final long startTimeMillis = System.currentTimeMillis(); - - isRunningStat.set(1); - try { - updateHourlyCounts(); - } catch (Exception ex) { - LOG.error("Unexpected exception while getting hourly tweet counts", ex); - } finally { - isRunningStat.set(0); - - long elapsedTimeMillis = System.currentTimeMillis() - startTimeMillis; - checkTimeStat.timerIncrement(elapsedTimeMillis); - LOG.info("Done getting daily tweet counts. Hours without tweets: " - + getHoursWithNoTweets(FieldTermCounter.TWEET_COUNT_KEY)); - LOG.info("Updating tweet count takes " + (elapsedTimeMillis / 1000) + " secs."); - } - } -} diff --git a/src/java/com/twitter/search/earlybird/util/ViewerWriter.docx b/src/java/com/twitter/search/earlybird/util/ViewerWriter.docx new file mode 100644 index 000000000..8504af97d Binary files /dev/null and b/src/java/com/twitter/search/earlybird/util/ViewerWriter.docx differ diff --git a/src/java/com/twitter/search/earlybird/util/ViewerWriter.java b/src/java/com/twitter/search/earlybird/util/ViewerWriter.java deleted file mode 100644 index f6b02f1a5..000000000 --- a/src/java/com/twitter/search/earlybird/util/ViewerWriter.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.twitter.search.earlybird.util; - -import java.io.IOException; - -/** - * Interface class for writer. Writer should be passed in - * and have these methods. Currently keeps the hierarchy for - * completed and valid json, methods mirror the ones found in - * JsonWriter - * http://google-gson.googlecode.com/svn/trunk/gson/docs/javadocs/com/google/gson/stream/JsonWriter.html - */ -public interface ViewerWriter { - /** - * Writes a mark for the beginning of an array. - */ - ViewerWriter beginArray() throws IOException; - - /** - * Writes a mark for the beginning of an object. - */ - ViewerWriter beginObject() throws IOException; - - /** - * Writes a mark for the end of an array. - */ - ViewerWriter endArray() throws IOException; - - /** - * Writes a mark for the end of an object. - */ - ViewerWriter endObject() throws IOException; - - /** - * Writes the name (key) of a property. - */ - ViewerWriter name(String field) throws IOException; - - /** - * Writes the value of a property. - */ - ViewerWriter value(String s) throws IOException; - - /** - * Writes a new line. - */ - ViewerWriter newline() throws IOException; -} diff --git a/src/java/com/twitter/search/earlybird_root/BUILD b/src/java/com/twitter/search/earlybird_root/BUILD deleted file mode 100644 index e28e612bd..000000000 --- a/src/java/com/twitter/search/earlybird_root/BUILD +++ /dev/null @@ -1,75 +0,0 @@ -java_library( - name = "earlybird_root-lib", - sources = ["*.java"], - platform = "java8", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/commons-lang", - "3rdparty/jvm/org/slf4j:slf4j-api", - "decider/src/main/scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization/server", - "finagle/finagle-memcached/src/main/java", - "finagle/finagle-mux/src/main/scala", - "finagle/finagle-thrift/src/main/java", - "finagle/finagle-thrift/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "finatra/inject/inject-server/src/main/scala/com/twitter/inject/server", - "src/java/com/google/common/util/concurrent", - "src/java/com/twitter/common/collections", - "src/java/com/twitter/common_internal/text/version", - "src/java/com/twitter/search/common/caching", - "src/java/com/twitter/search/common/clientstats", - "src/java/com/twitter/search/common/config", - "src/java/com/twitter/search/common/dark", - "src/java/com/twitter/search/common/decider", - "src/java/com/twitter/search/common/metrics", - "src/java/com/twitter/search/common/partitioning/base", - "src/java/com/twitter/search/common/partitioning/zookeeper", - "src/java/com/twitter/search/common/relevance:ranking", - "src/java/com/twitter/search/common/root", - "src/java/com/twitter/search/common/runtime", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/common/search", - "src/java/com/twitter/search/common/util/earlybird", - "src/java/com/twitter/search/common/util/io/periodic", - "src/java/com/twitter/search/common/util/zookeeper", - "src/java/com/twitter/search/earlybird/common", - "src/java/com/twitter/search/earlybird/config", - "src/java/com/twitter/search/earlybird_root/caching", - "src/java/com/twitter/search/earlybird_root/common", - "src/java/com/twitter/search/earlybird_root/filters", - "src/java/com/twitter/search/earlybird_root/mergers", - "src/java/com/twitter/search/earlybird_root/quota", - "src/java/com/twitter/search/earlybird_root/routers", - "src/java/com/twitter/search/earlybird_root/visitors", - "src/java/com/twitter/search/queryparser", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/thrift/com/twitter/search:benchmark_query-java", - "src/thrift/com/twitter/search:earlybird-java", - "stitch/stitch-core", - "strato/src/main/scala/com/twitter/strato/catalog", - "strato/src/main/scala/com/twitter/strato/client", - "thrift-web-forms", - "thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/model", - ], -) - -jvm_binary( - name = "earlybird_root-binary", - basename = "earlybird_root", - # The main class is reset in the aurora files (it's a required param). - # We need to set it to something here, because hadoop_binary requires it. - main = "com.twitter.search.earlybird_root.RealtimeRootAppMain", - runtime_platform = "java11", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/org/slf4j:slf4j-log4j12", - ":earlybird_root-lib", - "src/java/com/twitter/search/common/logging:search-log4j", - # For /admin/logging. - "twitter-server/slf4j-log4j12/src/main/scala", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/BUILD.docx b/src/java/com/twitter/search/earlybird_root/BUILD.docx new file mode 100644 index 000000000..cf40b0e60 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.docx b/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.docx new file mode 100644 index 000000000..33771443e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.java b/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.java deleted file mode 100644 index e24333735..000000000 --- a/src/java/com/twitter/search/earlybird_root/ClientBackupFilter.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.finagle.client.BackupRequestFilter; -import com.twitter.finagle.service.ResponseClassifier; -import com.twitter.finagle.service.RetryBudgets; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.finagle.util.DefaultTimer; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCustomGauge; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.util.Future; -import com.twitter.util.tunable.Tunable; - -public class ClientBackupFilter extends SimpleFilter { - private static final Logger LOG = LoggerFactory.getLogger(ClientBackupFilter.class); - - private final Map> - clientBackupFilters = new ConcurrentHashMap<>(); - private final boolean sendInterupts = false; - private final String statPrefix; - private final Tunable.Mutable maxExtraLoad; - private final StatsReceiver statsReceiver; - private final SearchDecider decider; - private final String backupRequestPrecentExtraLoadDecider; - private final int minSendBackupAfterMs = 1; - - public ClientBackupFilter(String serviceName, - String statPrefix, - StatsReceiver statsReceiver, - SearchDecider decider) { - this.statPrefix = statPrefix; - this.backupRequestPrecentExtraLoadDecider = serviceName + "_backup_request_percent_extra_load"; - this.decider = decider; - this.maxExtraLoad = Tunable.mutable("backup_tunable", getMaxExtraLoadFromDecider()); - this.statsReceiver = statsReceiver; - SearchCustomGauge.export(serviceName + "_backup_request_factor", - () -> (maxExtraLoad.apply().isDefined()) ? (double) maxExtraLoad.apply().get() : -1); - } - - private double getMaxExtraLoadFromDecider() { - return ((double) decider.getAvailability(backupRequestPrecentExtraLoadDecider)) / 100 / 100; - } - - private BackupRequestFilter backupFilter(String client) { - return new BackupRequestFilter( - maxExtraLoad, - sendInterupts, - minSendBackupAfterMs, - ResponseClassifier.Default(), - RetryBudgets.newRetryBudget(), - statsReceiver.scope(statPrefix, client, "backup_filter"), - DefaultTimer.getInstance(), - client); - } - - private void updateMaxExtraLoadIfNecessary() { - double maxExtraLoadDeciderValue = getMaxExtraLoadFromDecider(); - if (maxExtraLoad.apply().isDefined() - && !maxExtraLoad.apply().get().equals(maxExtraLoadDeciderValue)) { - LOG.info("Updating maxExtraLoad from {} to {}", - maxExtraLoad.apply().get(), - maxExtraLoadDeciderValue); - maxExtraLoad.set(maxExtraLoadDeciderValue); - } - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - updateMaxExtraLoadIfNecessary(); - - String clientID = ClientIdUtil.getClientIdFromRequest(request); - BackupRequestFilter filter = - clientBackupFilters.computeIfAbsent(clientID, this::backupFilter); - - return filter - .andThen(service) - .apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.docx b/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.docx new file mode 100644 index 000000000..9a2fbb440 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.java b/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.java deleted file mode 100644 index 0106d7a28..000000000 --- a/src/java/com/twitter/search/earlybird_root/ClientLatencyFilter.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.concurrent.ConcurrentHashMap; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.clientstats.RequestCounters; -import com.twitter.search.common.clientstats.RequestCountersEventListener; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.filters.EarlybirdSuccessfulResponseHandler; -import com.twitter.util.Future; - -public class ClientLatencyFilter extends SimpleFilter { - // _client_latency_stats_for_ is intended to measure the latency of requests to services that this - // root depends on. This can be used to measure how long a request takes in transit between when - // it leaves a root and when a root receives the response, in case this latency is significantly - // different than Earlybird measured latency. We break it down by client, so that we can tell - // which customers are being hit by this latency. - private static final String STAT_FORMAT = "%s_client_latency_stats_for_%s"; - - private final ConcurrentHashMap requestCounterForClient = - new ConcurrentHashMap<>(); - private final String prefix; - - public ClientLatencyFilter(String prefix) { - this.prefix = prefix; - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - - RequestCounters requestCounters = requestCounterForClient.computeIfAbsent( - ClientIdUtil.getClientIdFromRequest(request), client -> - new RequestCounters(String.format(STAT_FORMAT, prefix, client))); - - RequestCountersEventListener requestCountersEventListener = - new RequestCountersEventListener<>(requestCounters, Clock.SYSTEM_CLOCK, - EarlybirdSuccessfulResponseHandler.INSTANCE); - return service.apply(request).addEventListener(requestCountersEventListener); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.docx new file mode 100644 index 000000000..c3b949a66 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.java b/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.java deleted file mode 100644 index 79a67ac3b..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdCacheCommonModule.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Provides; - -import com.twitter.finagle.memcached.JavaClient; -import com.twitter.finagle.mtls.authentication.ServiceIdentifier; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.EarlybirdCacheSerializer; -import com.twitter.search.common.caching.SearchCacheBuilder; -import com.twitter.search.common.caching.SearchMemcacheClientConfig; -import com.twitter.search.common.caching.SearchMemcacheClientFactory; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.caching.CacheCommonUtil; -import com.twitter.search.earlybird_root.caching.CacheStats; -import com.twitter.search.earlybird_root.caching.DefaultForcedCacheMissDecider; -import com.twitter.search.earlybird_root.filters.PostCacheRequestTypeCountFilter; -import com.twitter.util.Duration; - -/** - * Provides common bindings for cache related modules. - */ -public class EarlybirdCacheCommonModule extends TwitterModule { - private static final String CACHE_VERSION = "1"; - - @Override - public void configure() { - bind(PostCacheRequestTypeCountFilter.class).in(Singleton.class); - bind(DefaultForcedCacheMissDecider.class).in(Singleton.class); - } - - @Provides - @Singleton - @Named(CacheCommonUtil.NAMED_MAX_CACHE_RESULTS) - Integer provideMaxCacheResults() { - return 100; - } - - @Provides - @Singleton - JavaClient provideMemCacheClient( - StatsReceiver statsReceiver, ServiceIdentifier serviceIdentifier) { - SearchMemcacheClientConfig config = new SearchMemcacheClientConfig(); - config.connectTimeoutMs = Duration.fromMilliseconds(100); - config.requestTimeoutMs = Duration.fromMilliseconds(100); - config.failureAccrualFailuresNumber = 150; - config.failureAccrualFailuresDurationMillis = 30000; - config.failureAccrualDuration = Duration.fromMilliseconds(60000); - - return SearchMemcacheClientFactory.createMtlsClient( - "", - "earlybird_root", - statsReceiver, - config, - serviceIdentifier - ); - } - - /** - * Create a new Earlybird cache. - * - * @param client the memcache client to use. - * @param decider the decider to use for the cache. - * @param cachePrefix the common cache prefix for the cache type. - * @param serializedKeyPrefix the common cache prefix for the cluster. - * @param cacheExpiryMillis cache entry ttl in milliseconds. - */ - static Cache createCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - String cachePrefix, - String serializedKeyPrefix, - long cacheExpiryMillis, - int cacheKeyMaxBytes, - int cacheValueMaxBytes) { - return new SearchCacheBuilder( - CACHE_VERSION, - client, - cachePrefix, - serializedKeyPrefix, - cacheExpiryMillis) - .withMaxKeyBytes(cacheKeyMaxBytes) - .withMaxValueBytes(cacheValueMaxBytes) - .withRequestTimeoutCounter(CacheStats.REQUEST_TIMEOUT_COUNTER) - .withRequestFailedCounter(CacheStats.REQUEST_FAILED_COUNTER) - .withCacheSerializer(new EarlybirdCacheSerializer()) - .withForceCacheMissDecider(decider) - .withInProcessCache() - .build(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.docx new file mode 100644 index 000000000..185f8153f Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.java b/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.java deleted file mode 100644 index 1c201ce09..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdChainedScatterGatherService.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.List; - -import javax.inject.Inject; - -import com.google.common.collect.Lists; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.util.Future; - -/** - * A chain of scatter gather services. - * Regular roots use ScatterGatherService directly. This class is only used by multi-tier roots. - */ -public class EarlybirdChainedScatterGatherService extends - Service>> { - - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdChainedScatterGatherService.class); - - private final List> serviceChain; - - /** - * Construct a ScatterGatherServiceChain, by loading configurations from earlybird-tiers.yml. - */ - @Inject - public EarlybirdChainedScatterGatherService( - EarlybirdServiceChainBuilder serviceChainBuilder, - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - PartitionLoggingSupport partitionLoggingSupport) { - - serviceChain = - serviceChainBuilder.buildServiceChain(scatterGatherSupport, partitionLoggingSupport); - - if (serviceChain.isEmpty()) { - LOG.error("At least one tier has to be enabled."); - throw new RuntimeException("Root does not work with all tiers disabled."); - } - } - - @Override - public Future>> apply(EarlybirdRequestContext requestContext) { - // Hit all tiers in parallel. - List> resultList = - Lists.newArrayListWithCapacity(serviceChain.size()); - for (final Service service : serviceChain) { - resultList.add(service.apply(requestContext)); - } - return Future.value(resultList); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.docx new file mode 100644 index 000000000..966bf4080 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.java b/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.java deleted file mode 100644 index f6918316d..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdCommonModule.java +++ /dev/null @@ -1,170 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import scala.PartialFunction; - -import com.google.inject.Provides; - -import org.apache.thrift.protocol.TProtocolFactory; - -import com.twitter.app.Flag; -import com.twitter.app.Flaggable; -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.finagle.service.ReqRep; -import com.twitter.finagle.service.ResponseClass; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.finagle.thrift.RichServerParam; -import com.twitter.finagle.thrift.ThriftClientRequest; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.dark.DarkProxy; -import com.twitter.search.common.dark.ResolverProxy; -import com.twitter.search.common.partitioning.zookeeper.SearchZkClient; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.RemoteClientBuilder; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.root.ServerSetsConfig; -import com.twitter.search.common.util.zookeeper.ZooKeeperProxy; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; - -/** - * Provides common bindings. - */ -public class EarlybirdCommonModule extends TwitterModule { - static final String NAMED_ALT_CLIENT = "alt_client"; - static final String NAMED_EXP_CLUSTER_CLIENT = "exp_cluster_client"; - - private final Flag altZkRoleFlag = createFlag( - "alt_zk_role", - "", - "The alternative ZooKeeper role", - Flaggable.ofString()); - private final Flag altZkClientEnvFlag = createFlag( - "alt_zk_client_env", - "", - "The alternative zk client environment", - Flaggable.ofString()); - private final Flag altPartitionZkPathFlag = createFlag( - "alt_partition_zk_path", - "", - "The alternative client partition zk path", - Flaggable.ofString()); - - @Override - public void configure() { - bind(InitializeFilter.class).in(Singleton.class); - bind(PreCacheRequestTypeCountFilter.class).in(Singleton.class); - - bind(Clock.class).toInstance(Clock.SYSTEM_CLOCK); - bind(QueryLangStatFilter.Config.class).toInstance(new QueryLangStatFilter.Config(100)); - } - - // Used in SearchRootModule. - @Provides - @Singleton - PartialFunction provideResponseClassifier() { - return new RootResponseClassifier(); - } - - @Provides - @Singleton - Service providesByteService( - EarlybirdService.ServiceIface svc, - DarkProxy darkProxy, - TProtocolFactory protocolFactory) { - return darkProxy.toFilter().andThen( - new EarlybirdService.Service( - svc, new RichServerParam(protocolFactory, SearchRootModule.SCROOGE_BUFFER_SIZE))); - } - - @Provides - @Singleton - @Named(SearchRootModule.NAMED_SERVICE_INTERFACE) - Class providesServiceInterface() { - return EarlybirdService.ServiceIface.class; - } - - @Provides - @Singleton - ZooKeeperProxy provideZookeeperClient() { - return SearchZkClient.getSZooKeeperClient(); - } - - @Provides - @Singleton - EarlybirdFeatureSchemaMerger provideFeatureSchemaMerger() { - return new EarlybirdFeatureSchemaMerger(); - } - - @Provides - @Singleton - @Nullable - @Named(NAMED_ALT_CLIENT) - ServerSetsConfig provideAltServerSetsConfig() { - if (!altZkRoleFlag.isDefined() || !altZkClientEnvFlag.isDefined()) { - return null; - } - - return new ServerSetsConfig(altZkRoleFlag.apply(), altZkClientEnvFlag.apply()); - } - - @Provides - @Singleton - @Nullable - @Named(NAMED_ALT_CLIENT) - PartitionConfig provideAltPartitionConfig(PartitionConfig defaultPartitionConfig) { - if (!altPartitionZkPathFlag.isDefined()) { - return null; - } - - return new PartitionConfig( - defaultPartitionConfig.getNumPartitions(), altPartitionZkPathFlag.apply()); - } - - @Provides - @Singleton - @Nullable - @Named(NAMED_ALT_CLIENT) - RootClientServiceBuilder provideAltRootClientServiceBuilder( - @Named(NAMED_ALT_CLIENT) @Nullable ServerSetsConfig serverSetsConfig, - @Named(SearchRootModule.NAMED_SERVICE_INTERFACE) Class serviceIface, - ResolverProxy resolverProxy, - RemoteClientBuilder remoteClientBuilder) { - if (serverSetsConfig == null) { - return null; - } - - return new RootClientServiceBuilder<>( - serverSetsConfig, serviceIface, resolverProxy, remoteClientBuilder); - } - - @Provides - @Singleton - @Named(NAMED_EXP_CLUSTER_CLIENT) - RootClientServiceBuilder provideExpClusterRootClientServiceBuilder( - @Named(SearchRootModule.NAMED_EXP_CLUSTER_SERVER_SETS_CONFIG) - ServerSetsConfig serverSetsConfig, - @Named(SearchRootModule.NAMED_SERVICE_INTERFACE) Class serviceIface, - ResolverProxy resolverProxy, - RemoteClientBuilder remoteClientBuilder) { - return new RootClientServiceBuilder<>( - serverSetsConfig, serviceIface, resolverProxy, remoteClientBuilder); - } - - @Provides - @Singleton - MtlsServerSessionTrackerFilter - provideMtlsServerSessionTrackerFilter(StatsReceiver statsReceiver) { - return new MtlsServerSessionTrackerFilter<>(statsReceiver); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.docx new file mode 100644 index 000000000..ea72b4312 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.java deleted file mode 100644 index b62adf579..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdFullArchiveScatterGatherSupport.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; - -/** - * The EarlybirdServiceScatterGatherSupport implementation used to fan out requests to the earlybird - * partitions in the full archive tiers. - */ -public class EarlybirdFullArchiveScatterGatherSupport extends EarlybirdServiceScatterGatherSupport { - /** Creates a new EarlybirdFullArchiveScatterGatherSupport instance. */ - @Inject - EarlybirdFullArchiveScatterGatherSupport( - PartitionMappingManager partitionMappingManager, - EarlybirdFeatureSchemaMerger featureSchemaMerger) { - super(partitionMappingManager, EarlybirdCluster.FULL_ARCHIVE, featureSchemaMerger); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.docx new file mode 100644 index 000000000..8902388ce Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.java deleted file mode 100644 index 97bcdd620..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedScatterGatherSupport.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; - -/** - * The EarlybirdServiceScatterGatherSupport implementation used to fan out requests to the earlybird - * partitions in the protected cluster. - */ -public class EarlybirdProtectedScatterGatherSupport extends EarlybirdServiceScatterGatherSupport { - /** - * Construct a EarlybirdProtectedScatterGatherSupport to do minUserFanOut, - * used only by protected. The main difference from the base class is that - * if the from user ID is not set, exception is thrown. - */ - @Inject - EarlybirdProtectedScatterGatherSupport( - PartitionMappingManager partitionMappingManager, - EarlybirdFeatureSchemaMerger featureSchemaMerger) { - super(partitionMappingManager, EarlybirdCluster.PROTECTED, featureSchemaMerger); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.docx new file mode 100644 index 000000000..02abe4993 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.java b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.java deleted file mode 100644 index ee53cfbae..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedValidationBehavior.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.search.earlybird_root; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; - -public class EarlybirdProtectedValidationBehavior extends EarlybirdServiceValidationBehavior { - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdProtectedValidationBehavior.class); - - @Override - public EarlybirdResponse getResponseIfInvalidRequest(EarlybirdRequest request) { - if (!request.isSetSearchQuery() || request.getSearchQuery() == null) { - String errorMsg = "Invalid EarlybirdRequest, no ThriftSearchQuery specified. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - ThriftSearchQuery searchQuery = request.getSearchQuery(); - - // Make sure this request is valid for the protected tweets cluster. - if (!searchQuery.isSetFromUserIDFilter64() || searchQuery.getFromUserIDFilter64().isEmpty()) { - String errorMsg = "ThriftSearchQuery.fromUserIDFilter64 not set. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (!searchQuery.isSetSearcherId()) { - String errorMsg = "ThriftSearchQuery.searcherId not set. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (searchQuery.getSearcherId() < 0) { - String errorMsg = "Invalid ThriftSearchQuery.searcherId: " + searchQuery.getSearcherId() - + ". " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - return super.getResponseIfInvalidRequest(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.docx new file mode 100644 index 000000000..a085cb402 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.java b/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.java deleted file mode 100644 index c1b022d66..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdProtectedWarmup.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.twitter.search.earlybird_root; - -import com.google.common.base.Preconditions; - -import com.twitter.common.util.Clock; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; - -public class EarlybirdProtectedWarmup extends EarlybirdWarmup { - - public EarlybirdProtectedWarmup(Clock clock, WarmupConfig config) { - super(clock, config); - } - - /** - * The protected cluster requires all queries to specify a fromUserIdFilter and a searcherId. - */ - @Override - protected EarlybirdRequest createRequest(int requestId) { - EarlybirdRequest request = super.createRequest(requestId); - - Preconditions.checkState(request.isSetSearchQuery()); - request.getSearchQuery().addToFromUserIDFilter64(requestId); - request.getSearchQuery().setSearcherId(0L); - - return request; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.docx new file mode 100644 index 000000000..371ca2da0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.java b/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.java deleted file mode 100644 index 07e07cb2d..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdQueryRewriteFilter.java +++ /dev/null @@ -1,157 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.List; -import java.util.Map; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Predicate; -import com.google.common.collect.Maps; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.query.Term; -import com.twitter.search.queryparser.query.annotation.Annotation; -import com.twitter.search.queryparser.rewriter.PredicateQueryNodeDropper; -import com.twitter.search.queryparser.visitors.TermExtractorVisitor; -import com.twitter.util.Future; - -/** - * Filter that rewrites the serialized query on EarlybirdRequest. - * As of now, this filter performs the following rewrites: - * - Drop ":v annotated variants based on decider, if the query has enough term nodes. - */ -public class EarlybirdQueryRewriteFilter extends - SimpleFilter { - - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdQueryRewriteFilter.class); - - private static final String DROP_PHRASE_VARIANT_FROM_QUERY_DECIDER_KEY_PATTERN = - "drop_variants_from_%s_%s_queries"; - - // only drop variants from queries with more than this number of terms. - private static final String MIN_TERM_COUNT_FOR_VARIANT_DROPPING_DECIDER_KEY_PATTERN = - "drop_variants_from_%s_%s_queries_term_count_threshold"; - - private static final SearchCounter QUERY_PARSER_FAILURE_COUNT = - SearchCounter.export("query_rewrite_filter_query_parser_failure_count"); - - // We currently add variants only to RECENCY and RELEVANCE requests, but it doesn't hurt to export - // stats for all request types. - @VisibleForTesting - static final Map DROP_VARIANTS_QUERY_COUNTS = - Maps.newEnumMap(EarlybirdRequestType.class); - static { - for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) { - DROP_VARIANTS_QUERY_COUNTS.put( - requestType, - SearchCounter.export(String.format("drop_%s_variants_query_count", - requestType.getNormalizedName()))); - } - } - - private static final Predicate DROP_VARIANTS_PREDICATE = - q -> q.hasAnnotationType(Annotation.Type.VARIANT); - - private static final PredicateQueryNodeDropper DROP_VARIANTS_VISITOR = - new PredicateQueryNodeDropper(DROP_VARIANTS_PREDICATE); - - private final SearchDecider decider; - private final String normalizedSearchRootName; - - @Inject - public EarlybirdQueryRewriteFilter( - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - this.decider = decider; - this.normalizedSearchRootName = normalizedSearchRootName; - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - - Query query = requestContext.getParsedQuery(); - // If there's no serialized query, no rewrite is necessary. - if (query == null) { - return service.apply(requestContext); - } else { - try { - Query variantsRemoved = maybeRemoveVariants(requestContext, query); - - if (query == variantsRemoved) { - return service.apply(requestContext); - } else { - EarlybirdRequestContext clonedRequestContext = - EarlybirdRequestContext.copyRequestContext(requestContext, variantsRemoved); - - return service.apply(clonedRequestContext); - } - } catch (QueryParserException e) { - // It is not clear here that the QueryParserException is the client's fault, or our fault. - // At this point it is most likely not the client's since we have a legitimate parsed Query - // from the client's request, and it's the rewriting that failed. - // In this case we choose to send the query as is (without the rewrite), instead of - // failing the entire request. - QUERY_PARSER_FAILURE_COUNT.increment(); - LOG.warn("Failed to rewrite serialized query: " + query.serialize(), e); - return service.apply(requestContext); - } - } - } - - private Query maybeRemoveVariants(EarlybirdRequestContext requestContext, Query query) - throws QueryParserException { - - if (shouldDropVariants(requestContext, query)) { - Query rewrittenQuery = DROP_VARIANTS_VISITOR.apply(query); - if (!query.equals(rewrittenQuery)) { - DROP_VARIANTS_QUERY_COUNTS.get(requestContext.getEarlybirdRequestType()).increment(); - return rewrittenQuery; - } - } - return query; - } - - private boolean shouldDropVariants(EarlybirdRequestContext requestContext, Query query) - throws QueryParserException { - TermExtractorVisitor termExtractorVisitor = new TermExtractorVisitor(false); - List terms = query.accept(termExtractorVisitor); - - EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType(); - - boolean shouldDropVariants = decider.isAvailable(getDropPhaseVariantDeciderKey(requestType)); - - return terms != null - && terms.size() >= decider.getAvailability( - getMinTermCountForVariantDroppingDeciderKey(requestType)) - && shouldDropVariants; - } - - private String getDropPhaseVariantDeciderKey(EarlybirdRequestType requestType) { - return String.format(DROP_PHRASE_VARIANT_FROM_QUERY_DECIDER_KEY_PATTERN, - normalizedSearchRootName, - requestType.getNormalizedName()); - } - - private String getMinTermCountForVariantDroppingDeciderKey(EarlybirdRequestType requestType) { - return String.format(MIN_TERM_COUNT_FOR_VARIANT_DROPPING_DECIDER_KEY_PATTERN, - normalizedSearchRootName, - requestType.getNormalizedName()); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.docx new file mode 100644 index 000000000..f84973ebf Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.java deleted file mode 100644 index 1ffd2d247..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeCgScatterGatherSupport.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; - -/** - * The EarlybirdServiceScatterGatherSupport implementation used to fan out requests to the earlybird - * partitions in the realtime_cg cluster. - */ -public class EarlybirdRealtimeCgScatterGatherSupport extends EarlybirdServiceScatterGatherSupport { - /** Creates a new EarlybirdRealtimeCgScatterGatherSupport instance. */ - @Inject - EarlybirdRealtimeCgScatterGatherSupport( - PartitionMappingManager partitionMappingManager, - EarlybirdFeatureSchemaMerger featureSchemaMerger) { - super(partitionMappingManager, EarlybirdCluster.REALTIME_CG, featureSchemaMerger); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.docx new file mode 100644 index 000000000..a6c5209c9 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.java deleted file mode 100644 index abe694857..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdRealtimeScatterGatherSupport.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; - -/** - * The EarlybirdServiceScatterGatherSupport implementation used to fan out requests to the earlybird - * partitions in the realtime cluster. - */ -public class EarlybirdRealtimeScatterGatherSupport extends EarlybirdServiceScatterGatherSupport { - /** Creates a new EarlybirdRealtimeScatterGatherSupport instance. */ - @Inject - EarlybirdRealtimeScatterGatherSupport( - PartitionMappingManager partitionMappingManager, - EarlybirdFeatureSchemaMerger featureSchemaMerger) { - super(partitionMappingManager, EarlybirdCluster.REALTIME, featureSchemaMerger); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.docx new file mode 100644 index 000000000..cc620830c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.java b/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.java deleted file mode 100644 index 979885d09..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdRootQueryUtils.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Map; - -import com.google.common.collect.Maps; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.earlybird_root.visitors.MultiTermDisjunctionPerPartitionVisitor; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; - -public final class EarlybirdRootQueryUtils { - - private static final Logger LOG = LoggerFactory.getLogger(EarlybirdRootQueryUtils.class); - - private EarlybirdRootQueryUtils() { - } - - /** - * Rewrite 'multi_term_disjunction from_user_id' or 'multi_term_disjunction id' based on partition - * for USER_ID/TWEET_ID partitioned cluster - * @return a map with partition id as key and rewritten query as value. - * If there is no 'multi_term_disjunction from_user_id/id' in query, the map will be empty; if all - * ids are truncated for a partition, it will add a NO_MATCH_CONJUNCTION here. - */ - public static Map rewriteMultiTermDisjunctionPerPartitionFilter( - Query query, PartitionMappingManager partitionMappingManager, int numPartitions) { - Map m = Maps.newHashMap(); - // If there is no parsed query, just return - if (query == null) { - return m; - } - for (int i = 0; i < numPartitions; ++i) { - MultiTermDisjunctionPerPartitionVisitor visitor = - new MultiTermDisjunctionPerPartitionVisitor(partitionMappingManager, i); - try { - Query q = query.accept(visitor); - if (q != null && q != query) { - m.put(i, q); - } - } catch (QueryParserException e) { - // Should not happen, put and log error here just in case - m.put(i, query); - LOG.error( - "MultiTermDisjuctionPerPartitionVisitor cannot process query: " + query.serialize()); - } - } - return m; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.docx new file mode 100644 index 000000000..fac0d7c21 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.java b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.java deleted file mode 100644 index 9790de9fb..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceChainBuilder.java +++ /dev/null @@ -1,278 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; - -import javax.inject.Inject; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RequestSuccessStats; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.root.ScatterGatherService; -import com.twitter.search.common.root.ScatterGatherSupport; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.config.TierConfig; -import com.twitter.search.earlybird.config.TierInfo; -import com.twitter.search.earlybird.config.TierInfoSource; -import com.twitter.search.earlybird.config.TierInfoUtil; -import com.twitter.search.earlybird.config.TierInfoWrapper; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.EarlybirdService.ServiceIface; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.EarlybirdTimeRangeFilter; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; -import com.twitter.util.Function; -import com.twitter.util.Future; - -@Singleton -public class EarlybirdServiceChainBuilder { - private static final Logger LOG = LoggerFactory.getLogger(EarlybirdServiceChainBuilder.class); - - private static final String SEARCH_METHOD_NAME = "search"; - - private static final EarlybirdResponse TIER_SKIPPED_RESPONSE = - new EarlybirdResponse(EarlybirdResponseCode.TIER_SKIPPED, 0) - .setSearchResults(new ThriftSearchResults()) - .setDebugString("Request to cluster dropped by decider, or sent as dark read."); - - private final EarlybirdTierThrottleDeciders tierThrottleDeciders; - - private final RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter; - - private final SearchDecider decider; - private final String normalizedSearchRootName; - private final RootClientServiceBuilder clientServiceBuilder; - private final String partitionPath; - private final int numPartitions; - private final SortedSet tierInfos; - private final PartitionAccessController partitionAccessController; - private final StatsReceiver statsReceiver; - - /** - * Construct a ScatterGatherServiceChain, by loading configurations from earlybird-tiers.yml. - */ - @Inject - public EarlybirdServiceChainBuilder( - PartitionConfig partitionConfig, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - EarlybirdTierThrottleDeciders tierThrottleDeciders, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName, - SearchDecider decider, - TierInfoSource tierConfig, - RootClientServiceBuilder clientServiceBuilder, - PartitionAccessController partitionAccessController, - StatsReceiver statsReceiver) { - this.partitionAccessController = partitionAccessController; - this.tierThrottleDeciders = Preconditions.checkNotNull(tierThrottleDeciders); - this.requestContextToEarlybirdRequestFilter = requestContextToEarlybirdRequestFilter; - this.normalizedSearchRootName = normalizedSearchRootName; - this.decider = decider; - this.statsReceiver = statsReceiver; - - List tierInformation = tierConfig.getTierInformation(); - if (tierInformation == null || tierInformation.isEmpty()) { - LOG.error( - "No tier found in config file {} Did you set SEARCH_ENV correctly?", - tierConfig.getConfigFileType()); - throw new RuntimeException("No tier found in tier config file."); - } - - // Get the tier info from the tier config yml file - TreeSet infos = new TreeSet<>(TierInfoUtil.TIER_COMPARATOR); - infos.addAll(tierInformation); - this.tierInfos = Collections.unmodifiableSortedSet(infos); - this.clientServiceBuilder = clientServiceBuilder; - this.partitionPath = partitionConfig.getPartitionPath(); - this.numPartitions = partitionConfig.getNumPartitions(); - - LOG.info("Found the following tiers from config: {}", tierInfos); - } - - /** Builds the chain of services that should be queried on each request. */ - public List> buildServiceChain( - ScatterGatherSupport support, - PartitionLoggingSupport partitionLoggingSupport) { - // Make sure the tier serving ranges do not overlap and do not have gaps. - TierInfoUtil.checkTierServingRanges(tierInfos); - - List> chain = Lists.newArrayList(); - - for (TierInfo tierInfo : tierInfos) { - String tierName = tierInfo.getTierName(); - if (tierInfo.isEnabled()) { - String rewrittenPartitionPath = partitionPath; - // This rewriting rule must match the rewriting rule inside - // EarlybirdServer#joinServerSet(). - if (!TierConfig.DEFAULT_TIER_NAME.equals(tierName)) { - rewrittenPartitionPath = partitionPath + "/" + tierName; - } - - clientServiceBuilder.initializeWithPathSuffix( - tierInfo.getTierName(), - numPartitions, - rewrittenPartitionPath); - - try { - chain.add(createTierService( - support, tierInfo, clientServiceBuilder, partitionLoggingSupport)); - } catch (Exception e) { - LOG.error("Failed to build clients for tier: {}", tierInfo.getTierName()); - throw new RuntimeException(e); - } - - } else { - LOG.info("Skipped disabled tier: {}", tierName); - } - } - - return chain; - } - - private Service createTierService( - ScatterGatherSupport support, - final TierInfo tierInfo, - RootClientServiceBuilder builder, - PartitionLoggingSupport partitionLoggingSupport) { - - final String tierName = tierInfo.getTierName(); - RequestSuccessStats stats = new RequestSuccessStats(tierName); - - List> services = - builder.safeBuildServiceList(SEARCH_METHOD_NAME); - - // Get the client list for this tier, and apply the degradationTrackerFilter to each response. - // - // We currently do this only for the EarlybirdSearchMultiTierAdaptor (the full archive cluster). - // If we want to do this for all clusters (or if we want to apply any other filter to all - // earlybird responses, for other clusters), we should change ScatterGatherService's constructor - // to take in a filter, and apply it there. - ClientBackupFilter backupFilter = new ClientBackupFilter( - "root_" + EarlybirdCluster.FULL_ARCHIVE.getNameForStats(), - tierName, - statsReceiver, - decider); - List> clients = Lists.newArrayList(); - ClientLatencyFilter latencyFilter = new ClientLatencyFilter(tierName); - for (Service client : services) { - clients.add(requestContextToEarlybirdRequestFilter - .andThen(backupFilter) - .andThen(latencyFilter) - .andThen(client)); - } - - clients = SkipPartitionFilter.wrapServices(tierName, clients, partitionAccessController); - - // Build the scatter gather service for this tier. - // Each tier has their own stats. - ScatterGatherService scatterGatherService = - new ScatterGatherService<>( - support, clients, stats, partitionLoggingSupport); - - SimpleFilter tierThrottleFilter = - getTierThrottleFilter(tierInfo, tierName); - - EarlybirdTimeRangeFilter timeRangeFilter = - EarlybirdTimeRangeFilter.newTimeRangeFilterWithQueryRewriter( - (requestContext, userOverride) -> new TierInfoWrapper(tierInfo, userOverride), - decider); - - return tierThrottleFilter - .andThen(timeRangeFilter) - .andThen(scatterGatherService); - } - - private SimpleFilter getTierThrottleFilter( - final TierInfo tierInfo, - final String tierName) { - - // A filter that throttles request rate. - final String tierThrottleDeciderKey = tierThrottleDeciders.getTierThrottleDeciderKey( - normalizedSearchRootName, tierName); - - SimpleFilter tierThrottleFilter = - new SimpleFilter() { - private final Map readCounts = - getReadCountsMap(); - - private Map getReadCountsMap() { - Map readCountsMap = - Maps.newEnumMap(TierInfo.RequestReadType.class); - for (TierInfo.RequestReadType readType : TierInfo.RequestReadType.values()) { - readCountsMap.put(readType, - SearchCounter.export("earlybird_tier_" + tierName + "_" - + readType.name().toLowerCase() + "_read_count")); - } - return Collections.unmodifiableMap(readCountsMap); - } - - private final SearchCounter tierRequestDroppedByDeciderCount = - SearchCounter.export("earlybird_tier_" + tierName - + "_request_dropped_by_decider_count"); - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - - // a blank response is returned when a request is dropped by decider, or - // a request is sent as a dark read. - final Future blankTierResponse = Future.value(TIER_SKIPPED_RESPONSE); - if (tierThrottleDeciders.shouldSendRequestToTier(tierThrottleDeciderKey)) { - TierInfoWrapper tierInfoWrapper = - new TierInfoWrapper(tierInfo, requestContext.useOverrideTierConfig()); - - TierInfo.RequestReadType readType = tierInfoWrapper.getReadType(); - readCounts.get(readType).increment(); - switch (readType) { - case DARK: - // dark read: call backend but do not wait for results - service.apply(requestContext); - return blankTierResponse; - case GREY: - // grey read: call backend, wait for results, but discard results. - return service.apply(requestContext).flatMap( - new Function>() { - @Override - public Future apply(EarlybirdResponse v1) { - // No matter what's returned, always return blankTierResponse. - return blankTierResponse; - } - }); - case LIGHT: - // light read: return the future from the backend service. - return service.apply(requestContext); - default: - throw new RuntimeException("Unknown read type: " + readType); - } - } else { - // Request is dropped by throttle decider - tierRequestDroppedByDeciderCount.increment(); - return blankTierResponse; - } - } - }; - return tierThrottleFilter; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.docx new file mode 100644 index 000000000..2a577c5a8 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.java deleted file mode 100644 index c9b0aa776..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceLoggingSupport.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.concurrent.TimeUnit; - -import com.google.common.base.Preconditions; - -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.Timer; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.earlybird.common.EarlybirdRequestPostLogger; -import com.twitter.search.earlybird.common.EarlybirdRequestPreLogger; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; - -public class EarlybirdServiceLoggingSupport extends - LoggingSupport.DefaultLoggingSupport { - private static final int LATENCY_WARN_THRESHOLD_MS = 100; - - private static final Timer DUMMY_TIMER; - - private final EarlybirdRequestPreLogger requestPreLogger; - private final EarlybirdRequestPostLogger requestPostLogger; - - - static { - DUMMY_TIMER = new Timer(TimeUnit.MILLISECONDS); - DUMMY_TIMER.stop(); - } - - public EarlybirdServiceLoggingSupport(SearchDecider decider) { - requestPreLogger = EarlybirdRequestPreLogger.buildForRoot(decider.getDecider()); - requestPostLogger = EarlybirdRequestPostLogger.buildForRoot(LATENCY_WARN_THRESHOLD_MS, - decider.getDecider()); - } - - @Override - public void prelogRequest(EarlybirdRequest req) { - requestPreLogger.logRequest(req); - } - - @Override - public void postLogRequest( - EarlybirdRequest request, - EarlybirdResponse response, - long latencyNanos) { - - Preconditions.checkNotNull(request); - Preconditions.checkNotNull(response); - - response.setResponseTimeMicros(TimeUnit.NANOSECONDS.toMicros(latencyNanos)); - response.setResponseTime(TimeUnit.NANOSECONDS.toMillis(latencyNanos)); - - requestPostLogger.logRequest(request, response, DUMMY_TIMER); - } - - @Override - public void logExceptions(EarlybirdRequest req, Throwable t) { - ExceptionHandler.logException(req, t); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.docx new file mode 100644 index 000000000..14b7b2b28 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.java deleted file mode 100644 index eb4533a31..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdServicePartitionLoggingSupport.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Map; -import java.util.Random; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class EarlybirdServicePartitionLoggingSupport - extends PartitionLoggingSupport.DefaultPartitionLoggingSupport { - private static final Logger PARTITION_LOG = LoggerFactory.getLogger("partitionLogger"); - - private static final long LATENCY_LOG_PARTITIONS_THRESHOLD_MS = 500; - private static final double FRACTION_OF_REQUESTS_TO_LOG = 1.0 / 500.0; - - private final Random random = new Random(); - - @Override - public void logPartitionLatencies(EarlybirdRequestContext requestContext, - String tierName, - Map partitionLatenciesMicros, - long latencyMs) { - String logReason = null; - - if (random.nextFloat() <= FRACTION_OF_REQUESTS_TO_LOG) { - logReason = "randomSample"; - } else if (latencyMs > LATENCY_LOG_PARTITIONS_THRESHOLD_MS) { - logReason = "slow"; - } - - EarlybirdRequest request = requestContext.getRequest(); - if (logReason != null && request.isSetSearchQuery()) { - PARTITION_LOG.info("{};{};{};{};{};{}", tierName, logReason, latencyMs, - partitionLatenciesMicros, request.getClientRequestID(), - request.getSearchQuery().getSerializedQuery()); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.docx new file mode 100644 index 000000000..b7d134a16 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.java b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.java deleted file mode 100644 index 8ca892dc7..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceScatterGatherSupport.java +++ /dev/null @@ -1,202 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import javax.inject.Inject; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; - -import com.twitter.search.common.partitioning.base.PartitionDataType; -import com.twitter.search.common.partitioning.base.PartitionMappingManager; -import com.twitter.search.common.root.ScatterGatherSupport; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.mergers.EarlybirdResponseMerger; -import com.twitter.search.earlybird_root.mergers.PartitionResponseAccumulator; -import com.twitter.search.queryparser.query.Query; -import com.twitter.util.Future; - -import static com.twitter.search.earlybird_root.visitors.MultiTermDisjunctionPerPartitionVisitor.NO_MATCH_CONJUNCTION; - -public class EarlybirdServiceScatterGatherSupport - implements ScatterGatherSupport { - - private static final EarlybirdResponse EMPTY_RESPONSE = newEmptyResponse(); - - private final PartitionMappingManager partitionMappingManager; - private final EarlybirdCluster cluster; - private final EarlybirdFeatureSchemaMerger featureSchemaMerger; - - @Inject - protected EarlybirdServiceScatterGatherSupport(PartitionMappingManager partitionMappingManager, - EarlybirdCluster cluster, - EarlybirdFeatureSchemaMerger featureSchemaMerger) { - this.partitionMappingManager = partitionMappingManager; - this.cluster = cluster; - this.featureSchemaMerger = featureSchemaMerger; - } - - /** - * Fans out the original request to all partitions. - */ - private List fanoutToAllPartitions( - EarlybirdRequestContext requestContext, int numPartitions) { - // We don't need to create a deep copy of the original requestContext for every partition, - // because requests are not rewritten once they get to this level: our roots have filters - // that rewrite the requests at the top-level, but we do not rewrite requests per-partition. - List requestContexts = new ArrayList<>(numPartitions); - for (int i = 0; i < numPartitions; ++i) { - requestContexts.add(requestContext); - } - return requestContexts; - } - - private Map> populateIdsForPartition(EarlybirdRequestContext requestContext) { - Map> perPartitionIds = Maps.newHashMap(); - // Based on partition type, populate map for every partition if needed. - if (partitionMappingManager.getPartitionDataType() == PartitionDataType.USER_ID - && requestContext.getRequest().getSearchQuery().getFromUserIDFilter64Size() > 0) { - for (long userId : requestContext.getRequest().getSearchQuery().getFromUserIDFilter64()) { - int userPartition = partitionMappingManager.getPartitionIdForUserId(userId); - if (!perPartitionIds.containsKey(userPartition)) { - perPartitionIds.put(userPartition, Lists.newArrayList()); - } - perPartitionIds.get(userPartition).add(userId); - } - } else if (partitionMappingManager.getPartitionDataType() == PartitionDataType.TWEET_ID - && requestContext.getRequest().getSearchQuery().getSearchStatusIdsSize() > 0) { - for (long id : requestContext.getRequest().getSearchQuery().getSearchStatusIds()) { - int tweetPartition = partitionMappingManager.getPartitionIdForTweetId(id); - if (!perPartitionIds.containsKey(tweetPartition)) { - perPartitionIds.put(tweetPartition, Lists.newArrayList()); - } - perPartitionIds.get(tweetPartition).add(id); - } - } - return perPartitionIds; - } - - private void setPerPartitionIds(EarlybirdRequest request, List ids) { - if (partitionMappingManager.getPartitionDataType() == PartitionDataType.USER_ID) { - request.getSearchQuery().setFromUserIDFilter64(ids); - } else { - request.getSearchQuery().setSearchStatusIds(Sets.newHashSet(ids)); - } - } - - @Override - public EarlybirdResponse emptyResponse() { - return EMPTY_RESPONSE; - } - - public static final EarlybirdResponse newEmptyResponse() { - return new EarlybirdResponse(EarlybirdResponseCode.PARTITION_SKIPPED, 0) - .setSearchResults(new ThriftSearchResults()); - } - - @Override - public List rewriteRequest( - EarlybirdRequestContext requestContext, int rootNumPartitions) { - int numPartitions = partitionMappingManager.getNumPartitions(); - Preconditions.checkState(rootNumPartitions == numPartitions, - "Root's configured numPartitions is different from that configured in database.yml."); - // Rewrite query based on "multi_term_disjunction id/from_user_id" and partition id if needed. - Map perPartitionQueryMap = - requestContext.getRequest().getSearchQuery().getSearchStatusIdsSize() == 0 - ? EarlybirdRootQueryUtils.rewriteMultiTermDisjunctionPerPartitionFilter( - requestContext.getParsedQuery(), - partitionMappingManager, - numPartitions) - : Maps.newHashMap(); - - // Key: partition Id; Value: valid ids list for this partition - Map> perPartitionIds = populateIdsForPartition(requestContext); - - if (perPartitionQueryMap.isEmpty() && perPartitionIds.isEmpty()) { - return fanoutToAllPartitions(requestContext, numPartitions); - } - - List requestContexts = new ArrayList<>(numPartitions); - for (int i = 0; i < numPartitions; ++i) { - requestContexts.add(null); - } - - // Rewrite per partition queries if exist. - for (int i = 0; i < numPartitions; ++i) { - if (perPartitionIds.containsKey(i)) { - if (!perPartitionQueryMap.containsKey(i)) { - // Query does not need to be rewritten for the partition - // But we still need to create a copy, because we're gonna - // set fromUserIDFilter64/searchStatusIds - requestContexts.set(i, requestContext.deepCopy()); - setPerPartitionIds(requestContexts.get(i).getRequest(), perPartitionIds.get(i)); - } else if (perPartitionQueryMap.get(i) != NO_MATCH_CONJUNCTION) { - requestContexts.set(i, EarlybirdRequestContext.copyRequestContext( - requestContext, perPartitionQueryMap.get(i))); - setPerPartitionIds(requestContexts.get(i).getRequest(), perPartitionIds.get(i)); - } - } else if (perPartitionIds.isEmpty()) { - // The fromUserIDFilter64/searchStatusIds field is not set on the original request, - // perPartitionQueryMap should decide if we send a request to this partition or not - if (!perPartitionQueryMap.containsKey(i)) { - // Query does not need to be rewritten for the partition - // Don't need to create a copy, because request context won't be changed afterwards - requestContexts.set(i, requestContext); - } else if (perPartitionQueryMap.get(i) != NO_MATCH_CONJUNCTION) { - requestContexts.set(i, EarlybirdRequestContext.copyRequestContext( - requestContext, perPartitionQueryMap.get(i))); - } - } - } - return requestContexts; - } - - /** - * Merges all the sub-results indexed by the partition id. Sub-results with value null - * indicate an error with that partition such as timeout etc. - */ - @Override - public Future merge(EarlybirdRequestContext requestContext, - List> responses) { - EarlybirdResponseMerger merger = EarlybirdResponseMerger.getResponseMerger( - requestContext, - responses, - new PartitionResponseAccumulator(), - cluster, - featureSchemaMerger, - partitionMappingManager.getNumPartitions()); - return merger.merge(); - } - - @Override - public boolean isSuccess(EarlybirdResponse earlybirdResponse) { - return EarlybirdResponseUtil.isSuccessfulResponse(earlybirdResponse); - } - - @Override - public boolean isTimeout(EarlybirdResponse earlybirdResponse) { - return earlybirdResponse.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR; - } - - @Override - public boolean isClientCancel(EarlybirdResponse earlybirdResponse) { - return earlybirdResponse.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR; - } - - @Override - public EarlybirdResponse errorResponse(String debugString) { - return new EarlybirdResponse() - .setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR) - .setDebugString(debugString); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.docx new file mode 100644 index 000000000..0489d4705 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.java b/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.java deleted file mode 100644 index d145b5e4d..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdServiceValidationBehavior.java +++ /dev/null @@ -1,111 +0,0 @@ -package com.twitter.search.earlybird_root; - -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; - -public class EarlybirdServiceValidationBehavior - extends ValidationBehavior.DefaultValidationBehavior { - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdServiceValidationBehavior.class); - - private static final EarlybirdDebugInfo EARLYBIRD_DEBUG_INFO = - new EarlybirdDebugInfo().setHost("earlybird_root"); - - private static final SearchCounter INVALID_SUCCESS_RESPONSE_THRESHOLD_TOO_LOW = - SearchCounter.export("invalid_success_response_threshold_too_low"); - private static final SearchCounter INVALID_SUCCESS_RESPONSE_THRESHOLD_TOO_HIGH = - SearchCounter.export("invalid_success_response_threshold_too_high"); - - protected EarlybirdResponse createErrorResponse(String errorMsg) { - EarlybirdResponse response = new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0); - - // We're changing some ERROR logs to WARN on our side, so we want to ensure - // that the response contains the debug information the client needs to - // resolve the problem. - response.setDebugInfo(EARLYBIRD_DEBUG_INFO); - response.setDebugString(errorMsg); - - return response; - } - - @Override - public EarlybirdResponse getResponseIfInvalidRequest(EarlybirdRequest request) { - // First, fix up the query. - EarlybirdRequestUtil.checkAndSetCollectorParams(request); - EarlybirdRequestUtil.logAndFixExcessiveValues(request); - - try { - request.validate(); - } catch (TException e) { - String errorMsg = "Invalid EarlybirdRequest. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (request.isSetSearchSegmentId() && request.getSearchSegmentId() <= 0) { - String errorMsg = "Bad time slice ID: " + request.getSearchSegmentId(); - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (request.isSetTermStatisticsRequest() - && request.getTermStatisticsRequest().isSetHistogramSettings() - && request.getTermStatisticsRequest().getHistogramSettings().getNumBins() == 0) { - - String errorMsg = "numBins for term statistics histograms request cannot be zero: " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (!request.isSetSearchQuery() - || request.getSearchQuery() == null) { - String errorMsg = "Invalid EarlybirdRequest, no ThriftSearchQuery specified. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - ThriftSearchQuery searchQuery = request.getSearchQuery(); - - if (!searchQuery.getCollectorParams().isSetNumResultsToReturn()) { - String errorMsg = "ThriftSearchQuery.numResultsToReturn not set. " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (searchQuery.getCollectorParams().getNumResultsToReturn() < 0) { - String errorMsg = "Invalid ThriftSearchQuery.collectorParams.numResultsToReturn: " - + searchQuery.getCollectorParams().getNumResultsToReturn() + ". " + request; - LOG.warn(errorMsg); - return createErrorResponse(errorMsg); - } - - if (request.isSetSuccessfulResponseThreshold()) { - double successfulResponseThreshold = request.getSuccessfulResponseThreshold(); - if (successfulResponseThreshold <= 0) { - String errorMsg = "Success response threshold is below or equal to 0: " - + successfulResponseThreshold + " request: " + request; - LOG.warn(errorMsg); - INVALID_SUCCESS_RESPONSE_THRESHOLD_TOO_LOW.increment(); - return createErrorResponse(errorMsg); - } else if (successfulResponseThreshold > 1) { - String errorMsg = "Success response threshold is above 1: " + successfulResponseThreshold - + " request: " + request; - LOG.warn(errorMsg); - INVALID_SUCCESS_RESPONSE_THRESHOLD_TOO_HIGH.increment(); - return createErrorResponse(errorMsg); - } - } - - return null; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.docx new file mode 100644 index 000000000..867e47d1c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.java b/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.java deleted file mode 100644 index 82f382ed3..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdTierThrottleDeciders.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.decider.SearchDecider; - -/** - * Controls fractions of requests that are sent out to each tier. - */ -@Singleton -public class EarlybirdTierThrottleDeciders { - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdTierThrottleDeciders.class); - private static final String TIER_THROTTLE_DECIDER_KEY_FORMAT = - "percentage_to_hit_cluster_%s_tier_%s"; - private final SearchDecider decider; - - /** - * Construct a decider using the singleton decider object injected by Guice for the - * specified tier. - * See {@link com.twitter.search.common.root.SearchRootModule#provideDecider()} - */ - @Inject - public EarlybirdTierThrottleDeciders(SearchDecider decider) { - this.decider = decider; - } - - /** - * Return the throttle decider key for the specified tier. - */ - public String getTierThrottleDeciderKey(String clusterName, String tierName) { - String deciderKey = String.format(TIER_THROTTLE_DECIDER_KEY_FORMAT, clusterName, tierName); - if (!decider.getDecider().feature(deciderKey).exists()) { - LOG.warn("Decider key {} not found. Will always return unavailable.", deciderKey); - } - return deciderKey; - } - - /** - * Check whether a request should be sent to the specified tier. - */ - public Boolean shouldSendRequestToTier(final String tierDarkReadDeciderKey) { - return decider.isAvailable(tierDarkReadDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.docx b/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.docx new file mode 100644 index 000000000..8e1e78a04 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.java b/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.java deleted file mode 100644 index 629f2764d..000000000 --- a/src/java/com/twitter/search/earlybird_root/EarlybirdWarmup.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.twitter.search.earlybird_root; - -import scala.runtime.AbstractFunction0; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.thrift.ClientId; -import com.twitter.search.common.caching.thriftjava.CachingParams; -import com.twitter.search.common.query.thriftjava.CollectorParams; -import com.twitter.search.common.ranking.thriftjava.ThriftRankingParams; -import com.twitter.search.common.ranking.thriftjava.ThriftScoringFunctionType; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode; -import com.twitter.search.earlybird.thrift.ThriftSearchRelevanceOptions; -import com.twitter.util.Future; - -/** - * Warm-up logic for Earlybird Roots. - * Sends 60 rounds of requests with a 1 second timeout between each round. - * The actual number of requests sent by each round can be configured. - */ -public class EarlybirdWarmup extends - SearchRootWarmup { - - private static final int WARMUP_NUM_RESULTS = 20; - - private static final String CLIENT_ID = "earlybird_root_warmup"; - - public EarlybirdWarmup(Clock clock, WarmupConfig config) { - super(clock, config); - } - - @Override - protected EarlybirdRequest createRequest(int requestId) { - String query = "(* " + "warmup" + requestId + ")"; - - return new EarlybirdRequest() - .setSearchQuery( - new ThriftSearchQuery() - .setNumResults(WARMUP_NUM_RESULTS) - .setCollectorParams( - new CollectorParams().setNumResultsToReturn(WARMUP_NUM_RESULTS)) - .setRankingMode(ThriftSearchRankingMode.RELEVANCE) - .setRelevanceOptions(new ThriftSearchRelevanceOptions() - .setRankingParams(new ThriftRankingParams() - .setType(ThriftScoringFunctionType.LINEAR))) - .setSerializedQuery(query)) - .setCachingParams(new CachingParams().setCache(false)) - .setClientId(CLIENT_ID); - } - - @Override - protected Future callService( - final EarlybirdService.ServiceIface service, - final EarlybirdRequest request) { - - return ClientId.apply(CLIENT_ID).asCurrent( - new AbstractFunction0>() { - @Override - public Future apply() { - return service.search(request); - } - }); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ExceptionHandler.docx b/src/java/com/twitter/search/earlybird_root/ExceptionHandler.docx new file mode 100644 index 000000000..58bc665ac Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ExceptionHandler.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ExceptionHandler.java b/src/java/com/twitter/search/earlybird_root/ExceptionHandler.java deleted file mode 100644 index 4176f358a..000000000 --- a/src/java/com/twitter/search/earlybird_root/ExceptionHandler.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.twitter.search.earlybird_root; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.earlybird.thrift.EarlybirdRequest; - -public final class ExceptionHandler { - private static final Logger LOG = LoggerFactory.getLogger(ExceptionHandler.class); - - private ExceptionHandler() { - } - - public static void logException(EarlybirdRequest request, Throwable e) { - LOG.error("Exception while handling request: {}", request, e); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.docx b/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.docx new file mode 100644 index 000000000..00672e2ae Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.java b/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.java deleted file mode 100644 index a573d5deb..000000000 --- a/src/java/com/twitter/search/earlybird_root/FullArchiveRootAppMain.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.Collection; - -import com.google.inject.Module; - -import com.twitter.search.common.root.SearchRootAppMain; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class FullArchiveRootAppMain extends SearchRootAppMain { - /** - * Boilerplate for the Java-friendly AbstractTwitterServer - */ - public static class Main { - public static void main(String[] args) { - new FullArchiveRootAppMain().main(args); - } - } - - @Override - protected Collection getAdditionalModules() { - return Arrays.asList( - new EarlybirdCommonModule(), - new EarlybirdCacheCommonModule(), - new FullArchiveRootModule(), - new QuotaModule() - ); - } - - @Override - protected Class getSearchRootServerClass() { - return FullArchiveRootServer.class; - } - - @Override - protected Class getServiceIfaceClass() { - return EarlybirdService.ServiceIface.class; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.docx b/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.docx new file mode 100644 index 000000000..f14db6422 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.java b/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.java deleted file mode 100644 index 1a128856b..000000000 --- a/src/java/com/twitter/search/earlybird_root/FullArchiveRootModule.java +++ /dev/null @@ -1,241 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.List; -import java.util.concurrent.TimeUnit; - -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Key; -import com.google.inject.Provides; - -import com.twitter.app.Flag; -import com.twitter.app.Flaggable; -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.memcached.JavaClient; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.SplitterService; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.config.TierInfoSource; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.caching.DefaultForcedCacheMissDecider; -import com.twitter.search.earlybird_root.caching.RecencyCache; -import com.twitter.search.earlybird_root.caching.RelevanceCache; -import com.twitter.search.earlybird_root.caching.StrictRecencyCache; -import com.twitter.search.earlybird_root.caching.TermStatsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsServicePostProcessor; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; -import com.twitter.util.Future; - -import static com.twitter.search.earlybird_root.EarlybirdCommonModule.NAMED_ALT_CLIENT; - -public class FullArchiveRootModule extends TwitterModule { - private static final String CLUSTER = "archive_full"; - private static final String ALT_TRAFFIC_PERCENTAGE_DECIDER_KEY = - "full_archive_alt_client_traffic_percentage"; - - private final Flag forceAltClientFlag = createFlag( - "force_alt_client", - false, - "Always sends traffic to the alt client", - Flaggable.ofJavaBoolean()); - - @Override - public void configure() { - bind(Key.get(EarlybirdCluster.class)).toInstance(EarlybirdCluster.FULL_ARCHIVE); - - bind(EarlybirdServiceScatterGatherSupport.class) - .to(EarlybirdFullArchiveScatterGatherSupport.class); - - bind(EarlybirdService.ServiceIface.class).to(FullArchiveRootService.class); - } - - @Provides - LoggingSupport provideLoggingSupport( - SearchDecider decider) { - return new EarlybirdServiceLoggingSupport(decider); - } - - @Provides - PartitionLoggingSupport providePartitionLoggingSupport() { - return new EarlybirdServicePartitionLoggingSupport(); - } - - @Provides - ValidationBehavior provideValidationBehavior() { - return new EarlybirdServiceValidationBehavior(); - } - - @Provides - @Singleton - @Nullable - @Named(NAMED_ALT_CLIENT) - EarlybirdServiceChainBuilder provideAltEarlybirdServiceChainBuilder( - @Named(NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - EarlybirdTierThrottleDeciders tierThrottleDeciders, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName, - SearchDecider decider, - TierInfoSource tierConfig, - @Named(NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - PartitionAccessController partitionAccessController, - StatsReceiver statsReceiver - ) { - if (altPartitionConfig == null || altRootClientServiceBuilder == null) { - return null; - } - - return new EarlybirdServiceChainBuilder( - altPartitionConfig, - requestContextToEarlybirdRequestFilter, - tierThrottleDeciders, - normalizedSearchRootName, - decider, - tierConfig, - altRootClientServiceBuilder, - partitionAccessController, - statsReceiver - ); - } - - @Provides - @Singleton - @Nullable - @Named(NAMED_ALT_CLIENT) - EarlybirdChainedScatterGatherService provideAltEarlybirdChainedScatterGatherService( - @Named(NAMED_ALT_CLIENT) @Nullable EarlybirdServiceChainBuilder altServiceChainBuilder, - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - PartitionLoggingSupport partitionLoggingSupport - ) { - if (altServiceChainBuilder == null) { - return null; - } - - return new EarlybirdChainedScatterGatherService( - altServiceChainBuilder, - scatterGatherSupport, - partitionLoggingSupport - ); - } - - @Provides - @Singleton - Service>> - provideEarlybirdChainedScatterGatherService( - EarlybirdChainedScatterGatherService chainedScatterGatherService, - @Named(NAMED_ALT_CLIENT) @Nullable - EarlybirdChainedScatterGatherService altChainedScatterGatherService, - SearchDecider decider - ) { - if (forceAltClientFlag.apply()) { - if (altChainedScatterGatherService == null) { - throw new RuntimeException( - "alt client cannot be null when 'force_alt_client' is set to true"); - } else { - return altChainedScatterGatherService; - } - } - - if (altChainedScatterGatherService == null) { - return chainedScatterGatherService; - } - - return new SplitterService<>( - chainedScatterGatherService, - altChainedScatterGatherService, - decider, - ALT_TRAFFIC_PERCENTAGE_DECIDER_KEY - ); - } - - @Provides - @Singleton - @RecencyCache - Cache provideRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, CLUSTER + "_recency_root", - serializedKeyPrefix, TimeUnit.HOURS.toMillis(2), cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @RelevanceCache - Cache provideRelevanceCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, CLUSTER + "_relevance_root", - serializedKeyPrefix, TimeUnit.HOURS.toMillis(2), cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @StrictRecencyCache - Cache provideStrictRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, CLUSTER + "_strict_recency_root", - serializedKeyPrefix, TimeUnit.HOURS.toMillis(2), cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TermStatsCache - Cache provideTermStatsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, CLUSTER + "_termstats_root", - serializedKeyPrefix, TimeUnit.MINUTES.toMillis(5), cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TopTweetsCache - Cache provideTopTweetsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, CLUSTER + "_toptweets_root", - serializedKeyPrefix, TopTweetsServicePostProcessor.CACHE_AGE_IN_MS, - cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - SearchRootWarmup providesSearchRootWarmup( - Clock clock, - WarmupConfig config) { - return new EarlybirdWarmup(clock, config); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.docx b/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.docx new file mode 100644 index 000000000..a1afd8797 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.java b/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.java deleted file mode 100644 index 5fc77bf12..000000000 --- a/src/java/com/twitter/search/earlybird_root/FullArchiveRootServer.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.SearchRootServer; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class FullArchiveRootServer extends SearchRootServer { - - @Inject - public FullArchiveRootServer(FullArchiveRootService svc, Service byteSvc) { - super(svc, byteSvc); - } - -} diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.docx b/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.docx new file mode 100644 index 000000000..d487ab2ce Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.java b/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.java deleted file mode 100644 index 977c8b974..000000000 --- a/src/java/com/twitter/search/earlybird_root/FullArchiveRootService.java +++ /dev/null @@ -1,148 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.List; -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.search.common.clientstats.FinagleClientStatsFilter; -import com.twitter.search.common.root.LoggingFilter; -import com.twitter.search.common.root.RequestValidationFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.EarlybirdStatusResponse; -import com.twitter.search.earlybird_root.caching.RecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceZeroResultsCacheFilter; -import com.twitter.search.earlybird_root.caching.StrictRecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.TermStatsCacheFilter; -import com.twitter.search.earlybird_root.caching.TopTweetsCacheFilter; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.ClientIdQueryOperatorStatsFilter; -import com.twitter.search.earlybird_root.filters.ClientIdQuotaFilter; -import com.twitter.search.earlybird_root.filters.ClientIdTrackingFilter; -import com.twitter.search.earlybird_root.filters.ClientRequestTimeFilter; -import com.twitter.search.earlybird_root.filters.DeadlineTimeoutStatsFilter; -import com.twitter.search.earlybird_root.filters.EarlybirdFeatureSchemaAnnotateFilter; -import com.twitter.search.earlybird_root.filters.FullArchiveProtectedOperatorFilter; -import com.twitter.search.earlybird_root.filters.InitializeRequestContextFilter; -import com.twitter.search.earlybird_root.filters.IsUserProtectedMetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.MetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.NullcastTrackingFilter; -import com.twitter.search.earlybird_root.filters.PostCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; -import com.twitter.search.earlybird_root.filters.QueryOperatorStatFilter; -import com.twitter.search.earlybird_root.filters.RequestResultStatsFilter; -import com.twitter.search.earlybird_root.filters.RequestSuccessStatsFilter; -import com.twitter.search.earlybird_root.filters.ResponseCodeStatFilter; -import com.twitter.search.earlybird_root.filters.ResultTierCountFilter; -import com.twitter.search.earlybird_root.filters.SearchPayloadSizeLocalContextFilter; -import com.twitter.search.earlybird_root.filters.RejectRequestsByQuerySourceFilter; -import com.twitter.search.earlybird_root.filters.StratoAttributionClientIdFilter; -import com.twitter.search.earlybird_root.filters.TopLevelExceptionHandlingFilter; -import com.twitter.util.Future; - -@Singleton -public class FullArchiveRootService implements EarlybirdService.ServiceIface { - - private final Service allFiltersAndService; - - @Inject - public FullArchiveRootService( - TopLevelExceptionHandlingFilter topLevelExceptionHandlingFilter, - ResponseCodeStatFilter responseCodeStatFilter, - LoggingFilter loggingFilter, - RequestValidationFilter validationFilter, - MtlsServerSessionTrackerFilter mtlsFilter, - FinagleClientStatsFilter finagleStatsFilter, - InitializeFilter initializeFilter, - InitializeRequestContextFilter initializeRequestContextFilter, - QueryLangStatFilter queryLangStatFilter, - FullArchiveProtectedOperatorFilter protectedOperatorFilter, - QueryOperatorStatFilter queryOperatorStatFilter, - ClientIdQueryOperatorStatsFilter clientIdQueryOperatorStatsFilter, - IsUserProtectedMetadataTrackingFilter isUserProtectedMetadataTrackingFilter, - RequestResultStatsFilter requestResultStatsFilter, - PreCacheRequestTypeCountFilter preCacheCountFilter, - RecencyCacheFilter recencyCacheFilter, - RelevanceCacheFilter relevanceCacheFilter, - RelevanceZeroResultsCacheFilter relevanceZeroResultsCacheFilter, - StrictRecencyCacheFilter strictRecencyCacheFilter, - TermStatsCacheFilter termStatsCacheFilter, - TopTweetsCacheFilter topTweetsCacheFilter, - PostCacheRequestTypeCountFilter postCacheCountFilter, - ClientIdTrackingFilter clientIdTrackingFilter, - ClientIdQuotaFilter quotaFilter, - RejectRequestsByQuerySourceFilter rejectRequestsByQuerySourceFilter, - MetadataTrackingFilter metadataTrackingFilter, - MultiTierResultsMergeFilter multiTierResultsMergeFilter, - RequestSuccessStatsFilter requestSuccessStatsFilter, - NullcastTrackingFilter nullcastTrackingFilter, - ClientRequestTimeFilter clientRequestTimeFilter, - DeadlineTimeoutStatsFilter deadlineTimeoutStatsFilter, - EarlybirdFeatureSchemaAnnotateFilter featureSchemaAnnotateFilter, - SearchPayloadSizeLocalContextFilter searchPayloadSizeLocalContextFilter, - EarlybirdQueryRewriteFilter queryRewriteFilter, - ResultTierCountFilter resultTierCountFilter, - StratoAttributionClientIdFilter stratoAttributionClientIdFilter, - Service>> chainedScatterGatherService - ) { - - this.allFiltersAndService = - loggingFilter - .andThen(topLevelExceptionHandlingFilter) - .andThen(stratoAttributionClientIdFilter) - .andThen(clientRequestTimeFilter) - .andThen(searchPayloadSizeLocalContextFilter) - .andThen(requestSuccessStatsFilter) - .andThen(requestResultStatsFilter) - .andThen(responseCodeStatFilter) - .andThen(validationFilter) - .andThen(mtlsFilter) - .andThen(finagleStatsFilter) - .andThen(clientIdTrackingFilter) - .andThen(quotaFilter) - .andThen(rejectRequestsByQuerySourceFilter) - .andThen(metadataTrackingFilter) - .andThen(initializeFilter) - .andThen(initializeRequestContextFilter) - .andThen(deadlineTimeoutStatsFilter) - .andThen(queryLangStatFilter) - .andThen(protectedOperatorFilter) - .andThen(queryOperatorStatFilter) - .andThen(clientIdQueryOperatorStatsFilter) - .andThen(isUserProtectedMetadataTrackingFilter) - .andThen(preCacheCountFilter) - .andThen(nullcastTrackingFilter) - .andThen(recencyCacheFilter) - .andThen(relevanceCacheFilter) - .andThen(relevanceZeroResultsCacheFilter) - .andThen(strictRecencyCacheFilter) - .andThen(termStatsCacheFilter) - .andThen(topTweetsCacheFilter) - .andThen(postCacheCountFilter) - .andThen(queryRewriteFilter) - .andThen(featureSchemaAnnotateFilter) - .andThen(resultTierCountFilter) - .andThen(multiTierResultsMergeFilter) - .andThen(chainedScatterGatherService); - } - - @Override - public Future getName() { - return Future.value("fullarchive"); - } - - @Override - public Future getStatus() { - throw new UnsupportedOperationException("not supported"); - } - - @Override - public Future search(EarlybirdRequest request) { - return allFiltersAndService.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/InitializeFilter.docx b/src/java/com/twitter/search/earlybird_root/InitializeFilter.docx new file mode 100644 index 000000000..ced6b4250 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/InitializeFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/InitializeFilter.java b/src/java/com/twitter/search/earlybird_root/InitializeFilter.java deleted file mode 100644 index 5d5659965..000000000 --- a/src/java/com/twitter/search/earlybird_root/InitializeFilter.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.twitter.search.earlybird_root; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.relevance.ranking.ActionChainManager; -import com.twitter.search.common.runtime.ActionChainDebugManager; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.util.Future; -import com.twitter.util.FutureEventListener; - -/** - * Initialize request-scope state and clean them at the end. - */ -public class InitializeFilter extends SimpleFilter { - @Override - public Future apply(EarlybirdRequest request, - Service service) { - ActionChainDebugManager.update(new ActionChainManager(request.getDebugMode()), "EarlybirdRoot"); - return service.apply(request).addEventListener(new FutureEventListener() { - @Override - public void onSuccess(EarlybirdResponse response) { - cleanup(); - } - - @Override - public void onFailure(Throwable cause) { - cleanup(); - } - }); - } - - private void cleanup() { - ActionChainDebugManager.clearLocals(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.docx b/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.docx new file mode 100644 index 000000000..699ce7f20 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.java b/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.java deleted file mode 100644 index d862e5047..000000000 --- a/src/java/com/twitter/search/earlybird_root/MultiTierResultsMergeFilter.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.List; - -import javax.inject.Inject; - -import com.twitter.finagle.Filter; -import com.twitter.finagle.Service; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.mergers.EarlybirdResponseMerger; -import com.twitter.search.earlybird_root.mergers.TierResponseAccumulator; -import com.twitter.util.Function; -import com.twitter.util.Future; - -/** - * Filter used to merge results from multiple tiers - */ -public class MultiTierResultsMergeFilter extends - Filter>> { - - private final EarlybirdFeatureSchemaMerger featureSchemaMerger; - - @Inject - public MultiTierResultsMergeFilter(EarlybirdFeatureSchemaMerger featureSchemaMerger) { - this.featureSchemaMerger = featureSchemaMerger; - } - - @Override - public Future apply( - final EarlybirdRequestContext request, - Service>> service) { - return service.apply(request).flatMap(Function.func(responses -> merge(request, responses))); - } - - private Future merge( - EarlybirdRequestContext requestContext, - List> responses) { - - // For multi-tier response merging, the number of partitions do not have meaning because - // the response is not uniformly partitioned anymore. We pass Integer.MAX_VALUE for stats - // counting purpose. - EarlybirdResponseMerger merger = EarlybirdResponseMerger.getResponseMerger( - requestContext, - responses, - new TierResponseAccumulator(), - EarlybirdCluster.FULL_ARCHIVE, - featureSchemaMerger, - Integer.MAX_VALUE); - return merger.merge(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/PartitionAccessController.docx b/src/java/com/twitter/search/earlybird_root/PartitionAccessController.docx new file mode 100644 index 000000000..722025511 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/PartitionAccessController.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/PartitionAccessController.java b/src/java/com/twitter/search/earlybird_root/PartitionAccessController.java deleted file mode 100644 index bf39b39fb..000000000 --- a/src/java/com/twitter/search/earlybird_root/PartitionAccessController.java +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -/** - * Determines if a root should send requests to certain partitions based on if they have been turned - * off by decider. - */ -public class PartitionAccessController { - private final String clusterName; - private final SearchDecider decider; - - @Inject - public PartitionAccessController( - @Named(SearchRootModule.NAMED_SEARCH_ROOT_NAME) String clusterName, - @Named(SearchRootModule.NAMED_PARTITION_DECIDER) SearchDecider partitionDecider) { - this.clusterName = clusterName; - this.decider = partitionDecider; - } - - /** - * Should root send requests to a given partition - * Designed to be used to quickly stop hitting a partition of there are problems with it. - */ - public boolean canAccessPartition( - String tierName, int partitionNum, String clientId, EarlybirdRequestType requestType) { - - String partitionDeciderName = - String.format("cluster_%s_skip_tier_%s_partition_%s", clusterName, tierName, partitionNum); - if (decider.isAvailable(partitionDeciderName)) { - SearchCounter.export(partitionDeciderName).increment(); - return false; - } - - String clientDeciderName = String.format("cluster_%s_skip_tier_%s_partition_%s_client_id_%s", - clusterName, tierName, partitionNum, clientId); - if (decider.isAvailable(clientDeciderName)) { - SearchCounter.export(clientDeciderName).increment(); - return false; - } - - String requestTypeDeciderName = String.format( - "cluster_%s_skip_tier_%s_partition_%s_request_type_%s", - clusterName, tierName, partitionNum, requestType.getNormalizedName()); - if (decider.isAvailable(requestTypeDeciderName)) { - SearchCounter.export(requestTypeDeciderName).increment(); - return false; - } - - String clientRequestTypeDeciderName = String.format( - "cluster_%s_skip_tier_%s_partition_%s_client_id_%s_request_type_%s", - clusterName, tierName, partitionNum, clientId, requestType.getNormalizedName()); - if (decider.isAvailable(clientRequestTypeDeciderName)) { - SearchCounter.export(clientRequestTypeDeciderName).increment(); - return false; - } - - return true; - } - - public String getClusterName() { - return clusterName; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.docx b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.docx new file mode 100644 index 000000000..48e1643c6 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.java b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.java deleted file mode 100644 index 68d155ae6..000000000 --- a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppMain.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.Collection; - -import com.google.inject.Module; - -import com.twitter.search.common.root.SearchRootAppMain; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class ProtectedRootAppMain extends SearchRootAppMain { - /** - * Boilerplate for the Java-friendly AbstractTwitterServer - */ - public static class Main { - public static void main(String[] args) { - new ProtectedRootAppMain().main(args); - } - } - - @Override - protected Collection getAdditionalModules() { - return Arrays.asList( - new EarlybirdCommonModule(), - new EarlybirdCacheCommonModule(), - new ProtectedRootAppModule(), - new ProtectedScatterGatherModule()); - } - - @Override - protected Class getSearchRootServerClass() { - return ProtectedRootServer.class; - } - - @Override - protected Class getServiceIfaceClass() { - return EarlybirdService.ServiceIface.class; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.docx b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.docx new file mode 100644 index 000000000..870b9af60 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.java b/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.java deleted file mode 100644 index 09b137f7f..000000000 --- a/src/java/com/twitter/search/earlybird_root/ProtectedRootAppModule.java +++ /dev/null @@ -1,78 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Key; -import com.google.inject.Provides; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.memcached.JavaClient; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.caching.DefaultForcedCacheMissDecider; -import com.twitter.search.earlybird_root.caching.RecencyCache; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class ProtectedRootAppModule extends TwitterModule { - @Override - public void configure() { - bind(Key.get(EarlybirdCluster.class)).toInstance(EarlybirdCluster.PROTECTED); - - bind(EarlybirdServiceScatterGatherSupport.class) - .to(EarlybirdProtectedScatterGatherSupport.class); - - bind(EarlybirdService.ServiceIface.class).to(ProtectedRootService.class); - } - - @Provides - @Singleton - LoggingSupport provideLoggingSupport( - SearchDecider decider) { - return new EarlybirdServiceLoggingSupport(decider); - } - - @Provides - @Singleton - PartitionLoggingSupport providePartitionLoggingSupport() { - return new EarlybirdServicePartitionLoggingSupport(); - } - - @Provides - @Singleton - ValidationBehavior providesValidation() { - return new EarlybirdProtectedValidationBehavior(); - } - - @Provides - @Singleton - @RecencyCache - Cache provideRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule - .createCache(client, decider, "realtime_protected_recency_root", serializedKeyPrefix, - 20000L, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - SearchRootWarmup providesSearchRootWarmup( - Clock clock, - WarmupConfig config) { - return new EarlybirdProtectedWarmup(clock, config); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.docx b/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.docx new file mode 100644 index 000000000..fc4c1ce45 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.java b/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.java deleted file mode 100644 index 926250641..000000000 --- a/src/java/com/twitter/search/earlybird_root/ProtectedRootServer.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.SearchRootServer; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class ProtectedRootServer extends SearchRootServer { - - @Inject - public ProtectedRootServer(ProtectedRootService svc, Service byteSvc) { - super(svc, byteSvc); - } - -} diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootService.docx b/src/java/com/twitter/search/earlybird_root/ProtectedRootService.docx new file mode 100644 index 000000000..f03bb8270 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ProtectedRootService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedRootService.java b/src/java/com/twitter/search/earlybird_root/ProtectedRootService.java deleted file mode 100644 index 2e9b0ed0a..000000000 --- a/src/java/com/twitter/search/earlybird_root/ProtectedRootService.java +++ /dev/null @@ -1,110 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.search.common.clientstats.FinagleClientStatsFilter; -import com.twitter.search.common.root.LoggingFilter; -import com.twitter.search.common.root.RequestValidationFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.EarlybirdStatusResponse; -import com.twitter.search.earlybird_root.caching.RecencyCacheFilter; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.ClientIdTrackingFilter; -import com.twitter.search.earlybird_root.filters.ClientRequestTimeFilter; -import com.twitter.search.earlybird_root.filters.DeadlineTimeoutStatsFilter; -import com.twitter.search.earlybird_root.filters.DropAllProtectedOperatorFilter; -import com.twitter.search.earlybird_root.filters.EarlybirdFeatureSchemaAnnotateFilter; -import com.twitter.search.earlybird_root.filters.InitializeRequestContextFilter; -import com.twitter.search.earlybird_root.filters.MetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.NullcastTrackingFilter; -import com.twitter.search.earlybird_root.filters.PostCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; -import com.twitter.search.earlybird_root.filters.QueryOperatorStatFilter; -import com.twitter.search.earlybird_root.filters.RequestResultStatsFilter; -import com.twitter.search.earlybird_root.filters.ResponseCodeStatFilter; -import com.twitter.search.earlybird_root.filters.SearchPayloadSizeLocalContextFilter; -import com.twitter.search.earlybird_root.filters.StratoAttributionClientIdFilter; -import com.twitter.search.earlybird_root.filters.TopLevelExceptionHandlingFilter; -import com.twitter.util.Future; - -@Singleton -public class ProtectedRootService implements EarlybirdService.ServiceIface { - - private final Service allFiltersAndService; - - @Inject - public ProtectedRootService( - LoggingFilter loggingFilter, - RequestValidationFilter validationFilter, - MtlsServerSessionTrackerFilter mtlsFilter, - FinagleClientStatsFilter finagleStatsFilter, - TopLevelExceptionHandlingFilter topLevelExceptionHandlingFilter, - ResponseCodeStatFilter responseCodeStatFilter, - InitializeFilter initializeFilter, - InitializeRequestContextFilter initializeRequestContextFilter, - QueryLangStatFilter queryLangStatFilter, - DropAllProtectedOperatorFilter dropAllProtectedOperatorFilter, - QueryOperatorStatFilter queryOperatorStatFilter, - RequestResultStatsFilter requestResultStatsFilter, - PreCacheRequestTypeCountFilter preCacheCountFilter, - RecencyCacheFilter recencyCacheFilter, - PostCacheRequestTypeCountFilter postCacheCountFilter, - ClientIdTrackingFilter clientIdTrackingFilter, - MetadataTrackingFilter metadataTrackingFilter, - NullcastTrackingFilter nullcastTrackingFilter, - ClientRequestTimeFilter clientRequestTimeFilter, - DeadlineTimeoutStatsFilter deadlineTimeoutStatsFilter, - EarlybirdFeatureSchemaAnnotateFilter featureSchemaAnnotateFilter, - SearchPayloadSizeLocalContextFilter searchPayloadSizeLocalContextFilter, - @Named(ProtectedScatterGatherModule.NAMED_SCATTER_GATHER_SERVICE) - Service scatterGatherService, - StratoAttributionClientIdFilter stratoAttributionClientIdFilter) { - allFiltersAndService = loggingFilter - .andThen(topLevelExceptionHandlingFilter) - .andThen(stratoAttributionClientIdFilter) - .andThen(clientRequestTimeFilter) - .andThen(searchPayloadSizeLocalContextFilter) - .andThen(responseCodeStatFilter) - .andThen(requestResultStatsFilter) - .andThen(validationFilter) - .andThen(mtlsFilter) - .andThen(finagleStatsFilter) - .andThen(clientIdTrackingFilter) - .andThen(metadataTrackingFilter) - .andThen(initializeFilter) - .andThen(initializeRequestContextFilter) - .andThen(deadlineTimeoutStatsFilter) - .andThen(queryLangStatFilter) - .andThen(nullcastTrackingFilter) - .andThen(dropAllProtectedOperatorFilter) - .andThen(queryOperatorStatFilter) - .andThen(preCacheCountFilter) - .andThen(recencyCacheFilter) - .andThen(postCacheCountFilter) - .andThen(featureSchemaAnnotateFilter) - .andThen(scatterGatherService); - } - - - @Override - public Future getName() { - return Future.value("protectedroot"); - } - - @Override - public Future getStatus() { - throw new UnsupportedOperationException("not supported"); - } - - @Override - public Future search(EarlybirdRequest request) { - return allFiltersAndService.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.docx b/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.docx new file mode 100644 index 000000000..511db6307 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.java b/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.java deleted file mode 100644 index 60a02e666..000000000 --- a/src/java/com/twitter/search/earlybird_root/ProtectedScatterGatherModule.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Provides; - -import com.twitter.finagle.Service; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RequestSuccessStats; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; - -public class ProtectedScatterGatherModule extends ScatterGatherModule { - /** - * Provides the scatterGatherService for the protected cluster. - */ - @Provides - @Singleton - @Named(NAMED_SCATTER_GATHER_SERVICE) - @Override - public Service provideScatterGatherService( - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - PartitionConfig partitionConfig, - RootClientServiceBuilder rootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_EXP_CLUSTER_CLIENT) - RootClientServiceBuilder - expClusterRootClientServiceBuilder, // unused in protected roots - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider) { - return buildScatterOrSplitterService( - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - partitionConfig, - rootClientServiceBuilder, - altPartitionConfig, - altRootClientServiceBuilder, - statsReceiver, - cluster, - decider - ); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/QuotaModule.docx b/src/java/com/twitter/search/earlybird_root/QuotaModule.docx new file mode 100644 index 000000000..cbbf6714f Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/QuotaModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/QuotaModule.java b/src/java/com/twitter/search/earlybird_root/QuotaModule.java deleted file mode 100644 index d013e17d3..000000000 --- a/src/java/com/twitter/search/earlybird_root/QuotaModule.java +++ /dev/null @@ -1,110 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.util.concurrent.TwitterRateLimiterProxyFactory; -import com.google.inject.Provides; - -import com.twitter.app.Flag; -import com.twitter.app.Flaggable; -import com.twitter.common.util.Clock; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird_root.filters.ClientIdArchiveAccessFilter; -import com.twitter.search.earlybird_root.filters.ClientIdQuotaFilter; -import com.twitter.search.earlybird_root.filters.DisableClientByTierFilter; -import com.twitter.search.earlybird_root.quota.ConfigBasedQuotaConfig; -import com.twitter.search.earlybird_root.quota.ConfigRepoBasedQuotaManager; - -public class QuotaModule extends TwitterModule { - @VisibleForTesting - public static final String NAMED_QUOTA_CONFIG_PATH = "quotaConfigPath"; - public static final String NAMED_CLIENT_QUOTA_KEY = "clientQuotaKey"; - private static final String NAMED_REQUIRE_QUOTA_CONFIG_FOR_CLIENTS - = "requireQuotaConfigForClients"; - - private final Flag quotaConfigPathFlag = createMandatoryFlag( - "quota_config_path", - "", - "Path to the quota config file", - Flaggable.ofString()); - - private final Flag clientQuotaKeyFlag = createFlag( - "client_quota_key", - "quota", - "The key that will be used to extract client quotas", - Flaggable.ofString()); - - private final Flag requireQuotaConfigForClientsFlag = createFlag( - "require_quota_config_for_clients", - true, - "If true, require a quota value under for each client in the config", - Flaggable.ofJavaBoolean()); - - @Provides - @Singleton - @Named(NAMED_QUOTA_CONFIG_PATH) - String provideQuotaConfigPath() { - return quotaConfigPathFlag.apply(); - } - - @Provides - @Singleton - @Named(NAMED_CLIENT_QUOTA_KEY) - String provideClientQuotaKey() { - return clientQuotaKeyFlag.apply(); - } - - @Provides - @Singleton - @Named(NAMED_REQUIRE_QUOTA_CONFIG_FOR_CLIENTS) - boolean provideRequireQuotaConfigForClients() { - return requireQuotaConfigForClientsFlag.apply(); - } - - @Provides - @Singleton - ClientIdQuotaFilter provideConfigRepoBasedClientIdQuotaFilter( - ConfigRepoBasedQuotaManager configRepoBasedQuotaManager, - TwitterRateLimiterProxyFactory rateLimiterProxyFactory) throws Exception { - return new ClientIdQuotaFilter(configRepoBasedQuotaManager, rateLimiterProxyFactory); - } - - @Provides - @Singleton - ConfigBasedQuotaConfig providesConfigBasedQuotaConfig( - @Nullable @Named(NAMED_QUOTA_CONFIG_PATH) String quotaConfigPath, - @Nullable @Named(NAMED_CLIENT_QUOTA_KEY) String clientQuotaKey, - @Nullable @Named(NAMED_REQUIRE_QUOTA_CONFIG_FOR_CLIENTS) boolean requireQuotaConfigForClients, - Clock clock - ) throws Exception { - ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("quota-config-reloader") - .setDaemon(true) - .build()); - return ConfigBasedQuotaConfig.newConfigBasedQuotaConfig( - quotaConfigPath, clientQuotaKey, requireQuotaConfigForClients, executorService, clock); - } - - @Provides - @Singleton - DisableClientByTierFilter provideDisableClientByTierFilter( - ConfigRepoBasedQuotaManager configRepoBasedQuotaManager, - SearchDecider searchDecider) { - return new DisableClientByTierFilter(configRepoBasedQuotaManager, searchDecider); - } - - @Provides - @Singleton - ClientIdArchiveAccessFilter clientIdArchiveAccessFilter( - ConfigRepoBasedQuotaManager configRepoBasedQuotaManager) { - return new ClientIdArchiveAccessFilter(configRepoBasedQuotaManager); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/README.docx b/src/java/com/twitter/search/earlybird_root/README.docx new file mode 100644 index 000000000..14224ba2e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/README.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/README.md b/src/java/com/twitter/search/earlybird_root/README.md deleted file mode 100644 index 750b8cdd6..000000000 --- a/src/java/com/twitter/search/earlybird_root/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Search Index (Earlybird) Root -Earlybird Roots are fanout services that fan out requests to different Earlybird clusters or partitions. - -## Architecture -![in-network](img/serving.png) - -Superroot serves as the entry point to Earlybird (Search Index) service. Request coming to superroot are first fanned out to realtime (public) and protected roots in parallel and may be fanned out to the archive root if realtime and protected clusters don't return enough results. -The realtime, protected and archive roots fanout requests to the earlybird partitions where the index is stored and served. diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.docx b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.docx new file mode 100644 index 000000000..59bb12b76 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.java b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.java deleted file mode 100644 index 748d4556b..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppMain.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.Collection; - -import com.google.inject.Module; - -import com.twitter.search.common.root.SearchRootAppMain; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class RealtimeCgRootAppMain extends SearchRootAppMain { - /** - * Boilerplate for the Java-friendly AbstractTwitterServer - */ - public static class Main { - public static void main(String[] args) { - new RealtimeCgRootAppMain().main(args); - } - } - - @Override - protected Collection getAdditionalModules() { - return Arrays.asList( - new EarlybirdCommonModule(), - new EarlybirdCacheCommonModule(), - new RealtimeCgRootAppModule(), - new RealtimeCgScatterGatherModule(), - new QuotaModule()); - } - - @Override - protected Class getSearchRootServerClass() { - return RealtimeCgRootServer.class; - } - - @Override - protected Class getServiceIfaceClass() { - return EarlybirdService.ServiceIface.class; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.docx b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.docx new file mode 100644 index 000000000..7f6ee4099 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.java b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.java deleted file mode 100644 index 2e0cde6a2..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootAppModule.java +++ /dev/null @@ -1,152 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Key; -import com.google.inject.Provides; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.memcached.JavaClient; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.caching.DefaultForcedCacheMissDecider; -import com.twitter.search.earlybird_root.caching.FacetsCache; -import com.twitter.search.earlybird_root.caching.RecencyCache; -import com.twitter.search.earlybird_root.caching.RelevanceCache; -import com.twitter.search.earlybird_root.caching.StrictRecencyCache; -import com.twitter.search.earlybird_root.caching.TermStatsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsServicePostProcessor; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RealtimeCgRootAppModule extends TwitterModule { - private static final long RECENCY_CACHE_TTL_MILLIS = 20000L; - private static final long RELEVANCE_CACHE_TTL_MILLIS = 20000L; - private static final long FACETS_CACHE_TTL_MILLIS = 300000L; - private static final long TERMSTATS_CACHE_TTL_MILLIS = 300000L; - - @Override - public void configure() { - bind(Key.get(EarlybirdCluster.class)).toInstance(EarlybirdCluster.REALTIME_CG); - - bind(EarlybirdServiceScatterGatherSupport.class) - .to(EarlybirdRealtimeCgScatterGatherSupport.class); - - bind(EarlybirdService.ServiceIface.class).to(RealtimeCgRootService.class); - } - - @Provides - @Singleton - @RecencyCache - Cache provideRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_cg_recency_root", - serializedKeyPrefix, RECENCY_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @RelevanceCache - Cache provideRelevanceCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_cg_relevance_root", - serializedKeyPrefix, RELEVANCE_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @StrictRecencyCache - Cache provideStrictRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache( - client, decider, "realtime_cg_strict_recency_root", serializedKeyPrefix, - RECENCY_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @FacetsCache - Cache provideFacetsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_cg_facets_root", - serializedKeyPrefix, FACETS_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TermStatsCache - Cache provideTermStatsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_cg_termstats_root", - serializedKeyPrefix, TERMSTATS_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TopTweetsCache - Cache provideTopTweetsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_cg_toptweets_root", - serializedKeyPrefix, TopTweetsServicePostProcessor.CACHE_AGE_IN_MS, - cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - SearchRootWarmup providesSearchRootWarmup( - Clock clock, - WarmupConfig config) { - return new EarlybirdWarmup(clock, config); - } - - @Provides - public LoggingSupport provideLoggingSupport( - SearchDecider decider) { - return new EarlybirdServiceLoggingSupport(decider); - } - - @Provides - public PartitionLoggingSupport providePartitionLoggingSupport() { - return new EarlybirdServicePartitionLoggingSupport(); - } - - @Provides - public ValidationBehavior provideValidationBehavior() { - return new EarlybirdServiceValidationBehavior(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.docx b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.docx new file mode 100644 index 000000000..3d854f489 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.java b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.java deleted file mode 100644 index 5d90da3af..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootServer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.SearchRootServer; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -@Singleton -public class RealtimeCgRootServer extends SearchRootServer { - - @Inject - public RealtimeCgRootServer(RealtimeCgRootService svc, Service byteSvc) { - super(svc, byteSvc); - } - -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.docx b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.docx new file mode 100644 index 000000000..a590f9d2d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.java b/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.java deleted file mode 100644 index 1e8a9cb76..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeCgRootService.java +++ /dev/null @@ -1,132 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Named; -import javax.inject.Singleton; - - -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.search.common.clientstats.FinagleClientStatsFilter; -import com.twitter.search.common.root.LoggingFilter; -import com.twitter.search.common.root.RequestValidationFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.EarlybirdStatusResponse; -import com.twitter.search.earlybird_root.caching.FacetsCacheFilter; -import com.twitter.search.earlybird_root.caching.RecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceZeroResultsCacheFilter; -import com.twitter.search.earlybird_root.caching.StrictRecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.TermStatsCacheFilter; -import com.twitter.search.earlybird_root.caching.TopTweetsCacheFilter; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.ClientIdQuotaFilter; -import com.twitter.search.earlybird_root.filters.ClientIdTrackingFilter; -import com.twitter.search.earlybird_root.filters.ClientRequestTimeFilter; -import com.twitter.search.earlybird_root.filters.DeadlineTimeoutStatsFilter; -import com.twitter.search.earlybird_root.filters.DropAllProtectedOperatorFilter; -import com.twitter.search.earlybird_root.filters.EarlybirdFeatureSchemaAnnotateFilter; -import com.twitter.search.earlybird_root.filters.InitializeRequestContextFilter; -import com.twitter.search.earlybird_root.filters.MetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.NullcastTrackingFilter; -import com.twitter.search.earlybird_root.filters.PostCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; -import com.twitter.search.earlybird_root.filters.QueryOperatorStatFilter; -import com.twitter.search.earlybird_root.filters.RequestResultStatsFilter; -import com.twitter.search.earlybird_root.filters.ResponseCodeStatFilter; -import com.twitter.search.earlybird_root.filters.SearchPayloadSizeLocalContextFilter; -import com.twitter.search.earlybird_root.filters.StratoAttributionClientIdFilter; -import com.twitter.search.earlybird_root.filters.TopLevelExceptionHandlingFilter; -import com.twitter.util.Future; - -@Singleton -public class RealtimeCgRootService implements EarlybirdService.ServiceIface { - - private final Service allFiltersAndService; - - @Inject - public RealtimeCgRootService( - TopLevelExceptionHandlingFilter topLevelExceptionHandlingFilter, - ResponseCodeStatFilter responseCodeStatFilter, - LoggingFilter loggingFilter, - RequestValidationFilter validationFilter, - MtlsServerSessionTrackerFilter mtlsFilter, - FinagleClientStatsFilter finagleStatsFilter, - InitializeFilter initializeFilter, - InitializeRequestContextFilter initializeRequestContextFilter, - QueryLangStatFilter queryLangStatFilter, - DropAllProtectedOperatorFilter dropAllProtectedOperatorFilter, - QueryOperatorStatFilter queryOperatorStatFilter, - RequestResultStatsFilter requestResultStatsFilter, - PreCacheRequestTypeCountFilter preCacheCountFilter, - RecencyCacheFilter recencyCacheFilter, - RelevanceCacheFilter relevanceCacheFilter, - RelevanceZeroResultsCacheFilter relevanceZeroResultsCacheFilter, - StrictRecencyCacheFilter strictRecencyCacheFilter, - FacetsCacheFilter facetsCacheFilter, - TermStatsCacheFilter termStatsCacheFilter, - TopTweetsCacheFilter topTweetsCacheFilter, - PostCacheRequestTypeCountFilter postCacheCountFilter, - ClientIdTrackingFilter clientIdTrackingFilter, - ClientIdQuotaFilter quotaFilter, - MetadataTrackingFilter metadataTrackingFilter, - NullcastTrackingFilter nullcastTrackingFilter, - ClientRequestTimeFilter clientRequestTimeFilter, - DeadlineTimeoutStatsFilter deadlineTimeoutStatsFilter, - EarlybirdFeatureSchemaAnnotateFilter featureSchemaAnnotateFilter, - SearchPayloadSizeLocalContextFilter searchPayloadSizeLocalContextFilter, - @Named(ProtectedScatterGatherModule.NAMED_SCATTER_GATHER_SERVICE) - Service scatterGatherService, - StratoAttributionClientIdFilter stratoAttributionClientIdFilter) { - this.allFiltersAndService = - loggingFilter - .andThen(topLevelExceptionHandlingFilter) - .andThen(stratoAttributionClientIdFilter) - .andThen(clientRequestTimeFilter) - .andThen(searchPayloadSizeLocalContextFilter) - .andThen(responseCodeStatFilter) - .andThen(requestResultStatsFilter) - .andThen(validationFilter) - .andThen(mtlsFilter) - .andThen(finagleStatsFilter) - .andThen(clientIdTrackingFilter) - .andThen(quotaFilter) - .andThen(metadataTrackingFilter) - .andThen(initializeFilter) - .andThen(initializeRequestContextFilter) - .andThen(deadlineTimeoutStatsFilter) - .andThen(queryLangStatFilter) - .andThen(nullcastTrackingFilter) - .andThen(dropAllProtectedOperatorFilter) - .andThen(queryOperatorStatFilter) - .andThen(preCacheCountFilter) - .andThen(recencyCacheFilter) - .andThen(relevanceCacheFilter) - .andThen(relevanceZeroResultsCacheFilter) - .andThen(strictRecencyCacheFilter) - .andThen(facetsCacheFilter) - .andThen(termStatsCacheFilter) - .andThen(topTweetsCacheFilter) - .andThen(postCacheCountFilter) - .andThen(featureSchemaAnnotateFilter) - .andThen(scatterGatherService); - } - - @Override - public Future getName() { - return Future.value("realtime_cg root"); - } - - @Override - public Future getStatus() { - throw new UnsupportedOperationException("not supported"); - } - - @Override - public Future search(EarlybirdRequest request) { - return allFiltersAndService.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.docx b/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.docx new file mode 100644 index 000000000..93fd27d58 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.java b/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.java deleted file mode 100644 index a98189d65..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeCgScatterGatherModule.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Provides; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RequestSuccessStats; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; - -public class RealtimeCgScatterGatherModule extends ScatterGatherModule { - private static final Logger LOG = - LoggerFactory.getLogger(RealtimeCgScatterGatherModule.class); - - /** - * Provides a scatter gather service for the realtime_cg cluster. - */ - @Provides - @Singleton - @Named(NAMED_SCATTER_GATHER_SERVICE) - @Override - public Service provideScatterGatherService( - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - PartitionConfig partitionConfig, - RootClientServiceBuilder rootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_EXP_CLUSTER_CLIENT) - RootClientServiceBuilder - expClusterRootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider) { - - - return - buildScatterOrSplitterService( - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - partitionConfig, - rootClientServiceBuilder, - altPartitionConfig, - altRootClientServiceBuilder, - statsReceiver, - cluster, - decider - ); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.docx b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.docx new file mode 100644 index 000000000..c10d6f8d8 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.java b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.java deleted file mode 100644 index 3fa8ccba6..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppMain.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.Collection; - -import com.google.inject.Module; - -import com.twitter.search.common.root.SearchRootAppMain; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -public class RealtimeRootAppMain extends SearchRootAppMain { - /** - * Boilerplate for the Java-friendly AbstractTwitterServer - */ - public static class Main { - public static void main(String[] args) { - new RealtimeRootAppMain().main(args); - } - } - - @Override - protected Collection getAdditionalModules() { - return Arrays.asList( - new EarlybirdCommonModule(), - new EarlybirdCacheCommonModule(), - new RealtimeRootAppModule(), - new RealtimeScatterGatherModule()); - } - - @Override - protected Class getSearchRootServerClass() { - return RealtimeRootServer.class; - } - - @Override - protected Class getServiceIfaceClass() { - return EarlybirdService.ServiceIface.class; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.docx b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.docx new file mode 100644 index 000000000..ddc5aadae Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.java b/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.java deleted file mode 100644 index 8e2328fb9..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeRootAppModule.java +++ /dev/null @@ -1,151 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Key; -import com.google.inject.Provides; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.memcached.JavaClient; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.caching.DefaultForcedCacheMissDecider; -import com.twitter.search.earlybird_root.caching.FacetsCache; -import com.twitter.search.earlybird_root.caching.RecencyCache; -import com.twitter.search.earlybird_root.caching.RelevanceCache; -import com.twitter.search.earlybird_root.caching.StrictRecencyCache; -import com.twitter.search.earlybird_root.caching.TermStatsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsCache; -import com.twitter.search.earlybird_root.caching.TopTweetsServicePostProcessor; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RealtimeRootAppModule extends TwitterModule { - private static final long RECENCY_CACHE_TTL_MILLIS = 20000L; - private static final long RELEVANCE_CACHE_TTL_MILLIS = 20000L; - private static final long FACETS_CACHE_TTL_MILLIS = 300000L; - private static final long TERMSTATS_CACHE_TTL_MILLIS = 300000L; - - @Override - public void configure() { - bind(Key.get(EarlybirdCluster.class)).toInstance(EarlybirdCluster.REALTIME); - - bind(EarlybirdServiceScatterGatherSupport.class) - .to(EarlybirdRealtimeScatterGatherSupport.class); - - bind(EarlybirdService.ServiceIface.class).to(RealtimeRootService.class); - } - - @Provides - @Singleton - @RecencyCache - Cache provideRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_recency_root", - serializedKeyPrefix, RECENCY_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @RelevanceCache - Cache provideRelevanceCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_relevance_root", - serializedKeyPrefix, RELEVANCE_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @StrictRecencyCache - Cache provideStrictRecencyCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_strict_recency_root", - serializedKeyPrefix, RECENCY_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @FacetsCache - Cache provideFacetsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_facets_root", - serializedKeyPrefix, FACETS_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TermStatsCache - Cache provideTermStatsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_termstats_root", - serializedKeyPrefix, TERMSTATS_CACHE_TTL_MILLIS, cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - @Singleton - @TopTweetsCache - Cache provideTopTweetsCache( - JavaClient client, - DefaultForcedCacheMissDecider decider, - @Named(SearchRootModule.NAMED_SERIALIZED_KEY_PREFIX) String serializedKeyPrefix, - @Named(SearchRootModule.NAMED_CACHE_KEY_MAX_BYTES) int cacheKeyMaxBytes, - @Named(SearchRootModule.NAMED_CACHE_VALUE_MAX_BYTES) int cacheValueMaxBytes) { - return EarlybirdCacheCommonModule.createCache(client, decider, "realtime_toptweets_root", - serializedKeyPrefix, TopTweetsServicePostProcessor.CACHE_AGE_IN_MS, - cacheKeyMaxBytes, cacheValueMaxBytes); - } - - @Provides - SearchRootWarmup providesSearchRootWarmup( - Clock clock, - WarmupConfig config) { - return new EarlybirdWarmup(clock, config); - } - - @Provides - public LoggingSupport provideLoggingSupport( - SearchDecider decider) { - return new EarlybirdServiceLoggingSupport(decider); - } - - @Provides - public PartitionLoggingSupport providePartitionLoggingSupport() { - return new EarlybirdServicePartitionLoggingSupport(); - } - - @Provides - public ValidationBehavior provideValidationBehavior() { - return new EarlybirdServiceValidationBehavior(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.docx b/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.docx new file mode 100644 index 000000000..4f987ed9d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.java b/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.java deleted file mode 100644 index 2b4aed336..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeRootServer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.SearchRootServer; -import com.twitter.search.earlybird.thrift.EarlybirdService; - -@Singleton -public class RealtimeRootServer extends SearchRootServer { - - @Inject - public RealtimeRootServer(RealtimeRootService svc, Service byteSvc) { - super(svc, byteSvc); - } - -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootService.docx b/src/java/com/twitter/search/earlybird_root/RealtimeRootService.docx new file mode 100644 index 000000000..ccef1058b Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeRootService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeRootService.java b/src/java/com/twitter/search/earlybird_root/RealtimeRootService.java deleted file mode 100644 index e13379337..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeRootService.java +++ /dev/null @@ -1,129 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Named; -import javax.inject.Singleton; - - -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.search.common.clientstats.FinagleClientStatsFilter; -import com.twitter.search.common.root.LoggingFilter; -import com.twitter.search.common.root.RequestValidationFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.EarlybirdStatusResponse; -import com.twitter.search.earlybird_root.caching.FacetsCacheFilter; -import com.twitter.search.earlybird_root.caching.RecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceCacheFilter; -import com.twitter.search.earlybird_root.caching.RelevanceZeroResultsCacheFilter; -import com.twitter.search.earlybird_root.caching.StrictRecencyCacheFilter; -import com.twitter.search.earlybird_root.caching.TermStatsCacheFilter; -import com.twitter.search.earlybird_root.caching.TopTweetsCacheFilter; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.ClientIdTrackingFilter; -import com.twitter.search.earlybird_root.filters.ClientRequestTimeFilter; -import com.twitter.search.earlybird_root.filters.DeadlineTimeoutStatsFilter; -import com.twitter.search.earlybird_root.filters.DropAllProtectedOperatorFilter; -import com.twitter.search.earlybird_root.filters.EarlybirdFeatureSchemaAnnotateFilter; -import com.twitter.search.earlybird_root.filters.InitializeRequestContextFilter; -import com.twitter.search.earlybird_root.filters.MetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.NullcastTrackingFilter; -import com.twitter.search.earlybird_root.filters.PostCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; -import com.twitter.search.earlybird_root.filters.QueryOperatorStatFilter; -import com.twitter.search.earlybird_root.filters.RequestResultStatsFilter; -import com.twitter.search.earlybird_root.filters.ResponseCodeStatFilter; -import com.twitter.search.earlybird_root.filters.SearchPayloadSizeLocalContextFilter; -import com.twitter.search.earlybird_root.filters.StratoAttributionClientIdFilter; -import com.twitter.search.earlybird_root.filters.TopLevelExceptionHandlingFilter; -import com.twitter.util.Future; - -@Singleton -public class RealtimeRootService implements EarlybirdService.ServiceIface { - - private final Service allFiltersAndService; - - @Inject - public RealtimeRootService( - TopLevelExceptionHandlingFilter topLevelExceptionHandlingFilter, - ResponseCodeStatFilter responseCodeStatFilter, - LoggingFilter loggingFilter, - RequestValidationFilter validationFilter, - MtlsServerSessionTrackerFilter mtlsFilter, - FinagleClientStatsFilter finagleStatsFilter, - InitializeFilter initializeFilter, - InitializeRequestContextFilter initializeRequestContextFilter, - QueryLangStatFilter queryLangStatFilter, - DropAllProtectedOperatorFilter dropAllProtectedOperatorFilter, - QueryOperatorStatFilter queryOperatorStatFilter, - RequestResultStatsFilter requestResultStatsFilter, - PreCacheRequestTypeCountFilter preCacheCountFilter, - RecencyCacheFilter recencyCacheFilter, - RelevanceCacheFilter relevanceCacheFilter, - RelevanceZeroResultsCacheFilter relevanceZeroResultsCacheFilter, - StrictRecencyCacheFilter strictRecencyCacheFilter, - FacetsCacheFilter facetsCacheFilter, - TermStatsCacheFilter termStatsCacheFilter, - TopTweetsCacheFilter topTweetsCacheFilter, - PostCacheRequestTypeCountFilter postCacheCountFilter, - ClientIdTrackingFilter clientIdTrackingFilter, - MetadataTrackingFilter metadataTrackingFilter, - NullcastTrackingFilter nullcastTrackingFilter, - ClientRequestTimeFilter clientRequestTimeFilter, - DeadlineTimeoutStatsFilter deadlineTimeoutStatsFilter, - EarlybirdFeatureSchemaAnnotateFilter featureSchemaAnnotateFilter, - SearchPayloadSizeLocalContextFilter searchPayloadSizeLocalContextFilter, - @Named(ProtectedScatterGatherModule.NAMED_SCATTER_GATHER_SERVICE) - Service scatterGatherService, - StratoAttributionClientIdFilter stratoAttributionClientIdFilter) { - this.allFiltersAndService = - loggingFilter - .andThen(topLevelExceptionHandlingFilter) - .andThen(stratoAttributionClientIdFilter) - .andThen(clientRequestTimeFilter) - .andThen(searchPayloadSizeLocalContextFilter) - .andThen(responseCodeStatFilter) - .andThen(requestResultStatsFilter) - .andThen(validationFilter) - .andThen(mtlsFilter) - .andThen(finagleStatsFilter) - .andThen(clientIdTrackingFilter) - .andThen(metadataTrackingFilter) - .andThen(initializeFilter) - .andThen(initializeRequestContextFilter) - .andThen(deadlineTimeoutStatsFilter) - .andThen(queryLangStatFilter) - .andThen(nullcastTrackingFilter) - .andThen(dropAllProtectedOperatorFilter) - .andThen(queryOperatorStatFilter) - .andThen(preCacheCountFilter) - .andThen(recencyCacheFilter) - .andThen(relevanceCacheFilter) - .andThen(relevanceZeroResultsCacheFilter) - .andThen(strictRecencyCacheFilter) - .andThen(facetsCacheFilter) - .andThen(termStatsCacheFilter) - .andThen(topTweetsCacheFilter) - .andThen(postCacheCountFilter) - .andThen(featureSchemaAnnotateFilter) - .andThen(scatterGatherService); - } - - @Override - public Future getName() { - return Future.value("realtime root"); - } - - @Override - public Future getStatus() { - throw new UnsupportedOperationException("not supported"); - } - - @Override - public Future search(EarlybirdRequest request) { - return allFiltersAndService.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.docx b/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.docx new file mode 100644 index 000000000..1551809f3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.java b/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.java deleted file mode 100644 index 16463e4bc..000000000 --- a/src/java/com/twitter/search/earlybird_root/RealtimeScatterGatherModule.java +++ /dev/null @@ -1,118 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import javax.annotation.Nullable; -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Provides; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RequestSuccessStats; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.root.ScatterGatherService; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.ExperimentCluster; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; -import com.twitter.search.earlybird_root.filters.ScatterGatherWithExperimentRedirectsService; - -public class RealtimeScatterGatherModule extends ScatterGatherModule { - private static final Logger LOG = - LoggerFactory.getLogger(RealtimeScatterGatherModule.class); - - /** - * Provides a scatter gather service for the realtime cluster that redirects to experimental - * clusters when the experiment cluster parameter is set on the EarlybirdRequest. - * - * Note: if an alternate client is specified via altPartitionConfig or - * altRootClientServiceBuilder, it will be built and used for the "control" cluster, but the - * experiment cluster takes precedence (if the experiment cluster is set on the request, the - * alternate client will never be used. - */ - @Provides - @Singleton - @Named(NAMED_SCATTER_GATHER_SERVICE) - @Override - public Service provideScatterGatherService( - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - PartitionConfig partitionConfig, - RootClientServiceBuilder rootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_EXP_CLUSTER_CLIENT) - RootClientServiceBuilder - expClusterRootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider) { - - - Service controlService = - buildScatterOrSplitterService( - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - partitionConfig, - rootClientServiceBuilder, - altPartitionConfig, - altRootClientServiceBuilder, - statsReceiver, - cluster, - decider - ); - - Map> - experimentScatterGatherServices = new HashMap<>(); - - LOG.info("Using ScatterGatherWithExperimentRedirectsService"); - LOG.info("Control Partition Path: {}", partitionConfig.getPartitionPath()); - - Arrays.stream(ExperimentCluster.values()) - .filter(v -> v.name().toLowerCase().startsWith("exp")) - .forEach(experimentCluster -> { - String expPartitionPath = partitionConfig.getPartitionPath() - + "-" + experimentCluster.name().toLowerCase(); - - LOG.info("Experiment Partition Path: {}", expPartitionPath); - - experimentScatterGatherServices.put(experimentCluster, - createScatterGatherService( - "", - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - partitionConfig.getNumPartitions(), - expPartitionPath, - expClusterRootClientServiceBuilder, - statsReceiver, - cluster, - decider, - experimentCluster.name().toLowerCase())); - }); - - return new ScatterGatherWithExperimentRedirectsService( - controlService, - experimentScatterGatherServices); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.docx b/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.docx new file mode 100644 index 000000000..0eebef1b8 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.java b/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.java deleted file mode 100644 index f9578b648..000000000 --- a/src/java/com/twitter/search/earlybird_root/RootResponseClassifier.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.twitter.search.earlybird_root; - -import scala.PartialFunction; -import scala.runtime.AbstractPartialFunction; - -import com.twitter.finagle.service.ReqRep; -import com.twitter.finagle.service.ResponseClass; -import com.twitter.finagle.service.ResponseClasses; -import com.twitter.finagle.service.ResponseClassifier; -import com.twitter.search.common.metrics.SearchRateCounter; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.util.Try; - -public class RootResponseClassifier extends AbstractPartialFunction { - private static final PartialFunction DEFAULT_CLASSIFIER = - ResponseClassifier.Default(); - - private static final SearchRateCounter NOT_EARLYBIRD_REQUEST_COUNTER = - SearchRateCounter.export("response_classifier_not_earlybird_request"); - private static final SearchRateCounter NOT_EARLYBIRD_RESPONSE_COUNTER = - SearchRateCounter.export("response_classifier_not_earlybird_response"); - private static final SearchRateCounter NON_RETRYABLE_FAILURE_COUNTER = - SearchRateCounter.export("response_classifier_non_retryable_failure"); - private static final SearchRateCounter RETRYABLE_FAILURE_COUNTER = - SearchRateCounter.export("response_classifier_retryable_failure"); - private static final SearchRateCounter SUCCESS_COUNTER = - SearchRateCounter.export("response_classifier_success"); - - @Override - public boolean isDefinedAt(ReqRep reqRep) { - if (!(reqRep.request() instanceof EarlybirdService.search_args)) { - NOT_EARLYBIRD_REQUEST_COUNTER.increment(); - return false; - } - - if (!reqRep.response().isThrow() && (!(reqRep.response().get() instanceof EarlybirdResponse))) { - NOT_EARLYBIRD_RESPONSE_COUNTER.increment(); - return false; - } - - return true; - } - - @Override - public ResponseClass apply(ReqRep reqRep) { - Try responseTry = reqRep.response(); - if (responseTry.isThrow()) { - return DEFAULT_CLASSIFIER.apply(reqRep); - } - - // isDefinedAt() guarantees that the response is an EarlybirdResponse instance. - EarlybirdResponseCode responseCode = ((EarlybirdResponse) responseTry.get()).getResponseCode(); - switch (responseCode) { - case PARTITION_NOT_FOUND: - case PARTITION_DISABLED: - case PERSISTENT_ERROR: - NON_RETRYABLE_FAILURE_COUNTER.increment(); - return ResponseClasses.NON_RETRYABLE_FAILURE; - case TRANSIENT_ERROR: - RETRYABLE_FAILURE_COUNTER.increment(); - return ResponseClasses.RETRYABLE_FAILURE; - default: - SUCCESS_COUNTER.increment(); - return ResponseClasses.SUCCESS; - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.docx b/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.docx new file mode 100644 index 000000000..455d95fc6 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.java b/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.java deleted file mode 100644 index a0bfa07ac..000000000 --- a/src/java/com/twitter/search/earlybird_root/ScatterGatherModule.java +++ /dev/null @@ -1,167 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.ArrayList; -import java.util.List; - -import javax.annotation.Nullable; -import javax.inject.Named; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.PartitionConfig; -import com.twitter.search.common.root.PartitionLoggingSupport; -import com.twitter.search.common.root.RequestSuccessStats; -import com.twitter.search.common.root.RootClientServiceBuilder; -import com.twitter.search.common.root.ScatterGatherService; -import com.twitter.search.common.root.SplitterService; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.config.TierConfig; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; - -public abstract class ScatterGatherModule extends TwitterModule { - private static final Logger LOG = LoggerFactory.getLogger(ScatterGatherModule.class); - - private static final String SEARCH_METHOD_NAME = "search"; - protected static final String ALT_TRAFFIC_PERCENTAGE_DECIDER_KEY_PREFIX = - "alt_client_traffic_percentage_"; - static final String NAMED_SCATTER_GATHER_SERVICE = "scatter_gather_service"; - - /** - * Provides the scatterGatherService for single tier Earlybird clusters (Protected and Realtime). - */ - public abstract Service provideScatterGatherService( - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - PartitionConfig partitionConfig, - RootClientServiceBuilder rootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_EXP_CLUSTER_CLIENT) - RootClientServiceBuilder - expClusterRootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider); - - protected final Service buildScatterOrSplitterService( - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - PartitionConfig partitionConfig, - RootClientServiceBuilder rootClientServiceBuilder, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable PartitionConfig altPartitionConfig, - @Named(EarlybirdCommonModule.NAMED_ALT_CLIENT) @Nullable - RootClientServiceBuilder altRootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider - ) { - ScatterGatherService scatterGatherService = - createScatterGatherService( - "", - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - partitionConfig.getNumPartitions(), - partitionConfig.getPartitionPath(), - rootClientServiceBuilder, - statsReceiver, - cluster, - decider, - TierConfig.DEFAULT_TIER_NAME); - - if (altPartitionConfig == null || altRootClientServiceBuilder == null) { - LOG.info("altPartitionConfig or altRootClientServiceBuilder is not available, " - + "not using SplitterService"); - return scatterGatherService; - } - - LOG.info("alt client config available, using SplitterService"); - - ScatterGatherService altScatterGatherService = - createScatterGatherService( - "_alt", - scatterGatherSupport, - requestSuccessStats, - partitionLoggingSupport, - requestContextToEarlybirdRequestFilter, - partitionAccessController, - altPartitionConfig.getNumPartitions(), - altPartitionConfig.getPartitionPath(), - altRootClientServiceBuilder, - statsReceiver, - cluster, - decider, - TierConfig.DEFAULT_TIER_NAME); - - return new SplitterService<>( - scatterGatherService, - altScatterGatherService, - decider, - ALT_TRAFFIC_PERCENTAGE_DECIDER_KEY_PREFIX + cluster.getNameForStats()); - } - - protected ScatterGatherService - createScatterGatherService( - String nameSuffix, - EarlybirdServiceScatterGatherSupport scatterGatherSupport, - RequestSuccessStats requestSuccessStats, - PartitionLoggingSupport partitionLoggingSupport, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - PartitionAccessController partitionAccessController, - int numPartitions, - String partitionPath, - RootClientServiceBuilder rootClientServiceBuilder, - StatsReceiver statsReceiver, - EarlybirdCluster cluster, - SearchDecider decider, - String clientName) { - rootClientServiceBuilder.initializeWithPathSuffix(clientName + nameSuffix, - numPartitions, - partitionPath); - - ClientBackupFilter backupFilter = - new ClientBackupFilter( - "root_" + cluster.getNameForStats(), - "earlybird" + nameSuffix, - statsReceiver, - decider); - - ClientLatencyFilter clientLatencyFilter = new ClientLatencyFilter("all" + nameSuffix); - - List> services = new ArrayList<>(); - for (Service service - : rootClientServiceBuilder - .safeBuildServiceList(SEARCH_METHOD_NAME)) { - services.add(requestContextToEarlybirdRequestFilter - .andThen(backupFilter) - .andThen(clientLatencyFilter) - .andThen(service)); - } - services = SkipPartitionFilter.wrapServices(TierConfig.DEFAULT_TIER_NAME, services, - partitionAccessController); - - return new ScatterGatherService<>( - scatterGatherSupport, - services, - requestSuccessStats, - partitionLoggingSupport); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.docx b/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.docx new file mode 100644 index 000000000..dcca887ef Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.java b/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.java deleted file mode 100644 index 1d9feac4b..000000000 --- a/src/java/com/twitter/search/earlybird_root/SkipPartitionFilter.java +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.ArrayList; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.util.Future; - -/** - * Filter that returns a PARTITION_SKIPPED response instead of sending the request to a partition - * if the partition PartitionAccessController says its disabled for a request. - */ -public final class SkipPartitionFilter extends - SimpleFilter { - - private static final Logger LOG = LoggerFactory.getLogger(SkipPartitionFilter.class); - - private final String tierName; - private final int partitionNum; - private final PartitionAccessController controller; - - private SkipPartitionFilter(String tierName, int partitionNum, - PartitionAccessController controller) { - this.tierName = tierName; - this.partitionNum = partitionNum; - this.controller = controller; - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - - EarlybirdRequest request = requestContext.getRequest(); - if (!controller.canAccessPartition(tierName, partitionNum, request.getClientId(), - EarlybirdRequestType.of(request))) { - return Future.value(EarlybirdServiceScatterGatherSupport.newEmptyResponse()); - } - - return service.apply(requestContext); - } - - /** - * Wrap the services with a SkipPartitionFilter - */ - public static List> wrapServices( - String tierName, - List> clients, - PartitionAccessController controller) { - - LOG.info("Creating SkipPartitionFilters for cluster: {}, tier: {}, partitions 0-{}", - controller.getClusterName(), tierName, clients.size() - 1); - - List> wrappedServices = new ArrayList<>(); - for (int partitionNum = 0; partitionNum < clients.size(); partitionNum++) { - SkipPartitionFilter filter = new SkipPartitionFilter(tierName, partitionNum, controller); - wrappedServices.add(filter.andThen(clients.get(partitionNum))); - } - - return wrappedServices; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.docx b/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.docx new file mode 100644 index 000000000..781d2e809 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.java b/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.java deleted file mode 100644 index 26ab5e5bb..000000000 --- a/src/java/com/twitter/search/earlybird_root/SuperRootAppMain.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Arrays; -import java.util.Collection; - -import com.google.inject.Module; - -import com.twitter.search.common.root.SearchRootAppMain; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.routers.FacetsRequestRouterModule; -import com.twitter.search.earlybird_root.routers.RecencyRequestRouterModule; -import com.twitter.search.earlybird_root.routers.RelevanceRequestRouterModule; -import com.twitter.search.earlybird_root.routers.TermStatsRequestRouterModule; -import com.twitter.search.earlybird_root.routers.TopTweetsRequestRouterModule; - -public class SuperRootAppMain extends SearchRootAppMain { - /** - * Boilerplate for the Java-friendly AbstractTwitterServer - */ - public static class Main { - public static void main(String[] args) { - new SuperRootAppMain().main(args); - } - } - - @Override - protected Collection getAdditionalModules() { - return Arrays.asList( - new EarlybirdCommonModule(), - new SuperRootAppModule(), - new TermStatsRequestRouterModule(), - new RecencyRequestRouterModule(), - new RelevanceRequestRouterModule(), - new TopTweetsRequestRouterModule(), - new FacetsRequestRouterModule(), - new QuotaModule()); - } - - @Override - protected Class getSearchRootServerClass() { - return SuperRootServer.class; - } - - @Override - protected Class getServiceIfaceClass() { - return EarlybirdService.ServiceIface.class; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.docx b/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.docx new file mode 100644 index 000000000..723db20bf Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.java b/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.java deleted file mode 100644 index 8c36f3aa2..000000000 --- a/src/java/com/twitter/search/earlybird_root/SuperRootAppModule.java +++ /dev/null @@ -1,234 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Named; -import javax.inject.Singleton; - -import com.google.inject.Key; -import com.google.inject.Provides; -import com.google.inject.util.Providers; - -import com.twitter.app.Flag; -import com.twitter.app.Flaggable; -import com.twitter.common.util.Clock; -import com.twitter.common_internal.text.version.PenguinVersionConfig; -import com.twitter.finagle.Name; -import com.twitter.finagle.Service; -import com.twitter.finagle.stats.StatsReceiver; -import com.twitter.inject.TwitterModule; -import com.twitter.search.common.config.SearchPenguinVersionsConfig; -import com.twitter.search.common.dark.ResolverProxy; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.LoggingSupport; -import com.twitter.search.common.root.RemoteClientBuilder; -import com.twitter.search.common.root.SearchRootWarmup; -import com.twitter.search.common.root.ValidationBehavior; -import com.twitter.search.common.root.WarmupConfig; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.ThriftTweetSource; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.InjectionNames; -import com.twitter.search.earlybird_root.filters.EarlybirdClusterAvailableFilter; -import com.twitter.search.earlybird_root.filters.MarkTweetSourceFilter; -import com.twitter.search.earlybird_root.filters.RequestContextToEarlybirdRequestFilter; -import com.twitter.search.earlybird_root.filters.RequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.ServiceExceptionHandlingFilter; -import com.twitter.search.earlybird_root.filters.ServiceResponseValidationFilter; -import com.twitter.search.earlybird_root.filters.UnsetSuperRootFieldsFilter; -import com.twitter.util.Future; - -public class SuperRootAppModule extends TwitterModule { - private final Flag rootRealtimeFlag = createFlag( - "root-realtime", - "", - "Override the path to root-realtime", - Flaggable.ofString()); - private final Flag rootProtectedFlag = createFlag( - "root-protected", - "", - "Override the path to root-protected", - Flaggable.ofString()); - private final Flag rootArchiveFullFlag = createFlag( - "root-archive-full", - "", - "Override the path to root-archive-full", - Flaggable.ofString()); - private final Flag penguinVersionsFlag = createMandatoryFlag( - "penguin_versions", - "Penguin versions to be tokenized", - "", - Flaggable.ofString()); - - @Override - public void configure() { - // SuperRoot uses all clusters, not just one. We bind EarlybirdCluster to null to indicate that - // there is not one specific cluster to use. - bind(Key.get(EarlybirdCluster.class)).toProvider(Providers.of(null)); - - bind(EarlybirdService.ServiceIface.class).to(SuperRootService.class); - } - - @Provides - SearchRootWarmup providesSearchRootWarmup( - Clock clock, - WarmupConfig config) { - return new EarlybirdWarmup(clock, config); - } - - @Provides - @Singleton - @Named(InjectionNames.REALTIME) - private EarlybirdService.ServiceIface providesRealtimeIface( - RemoteClientBuilder builder, - ResolverProxy proxy) throws Exception { - Name name = proxy.resolve(rootRealtimeFlag.apply()); - return builder.createRemoteClient(name, "realtime", "realtime_"); - } - - @Provides - @Singleton - @Named(InjectionNames.REALTIME) - private Service providesRealtimeService( - @Named(InjectionNames.REALTIME) - EarlybirdService.ServiceIface realtimeServiceIface, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - StatsReceiver statsReceiver, - SearchDecider decider) { - return buildClientService( - realtimeServiceIface, - new EarlybirdClusterAvailableFilter(decider, EarlybirdCluster.REALTIME), - new MarkTweetSourceFilter(ThriftTweetSource.REALTIME_CLUSTER), - new ServiceExceptionHandlingFilter(EarlybirdCluster.REALTIME), - new ServiceResponseValidationFilter(EarlybirdCluster.REALTIME), - new RequestTypeCountFilter(EarlybirdCluster.REALTIME.getNameForStats()), - requestContextToEarlybirdRequestFilter, - new UnsetSuperRootFieldsFilter(), - new ClientLatencyFilter(EarlybirdCluster.REALTIME.getNameForStats())); - } - - @Provides - @Singleton - @Named(InjectionNames.FULL_ARCHIVE) - private EarlybirdService.ServiceIface providesFullArchiveIface( - RemoteClientBuilder builder, - ResolverProxy proxy) throws Exception { - Name name = proxy.resolve(rootArchiveFullFlag.apply()); - return builder.createRemoteClient(name, "fullarchive", "full_archive_"); - } - - @Provides - @Singleton - @Named(InjectionNames.FULL_ARCHIVE) - private Service providesFullArchiveService( - @Named(InjectionNames.FULL_ARCHIVE) - EarlybirdService.ServiceIface fullArchiveServiceIface, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - StatsReceiver statsReceiver, - SearchDecider decider) { - return buildClientService( - fullArchiveServiceIface, - new EarlybirdClusterAvailableFilter(decider, EarlybirdCluster.FULL_ARCHIVE), - new MarkTweetSourceFilter(ThriftTweetSource.FULL_ARCHIVE_CLUSTER), - new ServiceExceptionHandlingFilter(EarlybirdCluster.FULL_ARCHIVE), - new ServiceResponseValidationFilter(EarlybirdCluster.FULL_ARCHIVE), - new RequestTypeCountFilter(EarlybirdCluster.FULL_ARCHIVE.getNameForStats()), - requestContextToEarlybirdRequestFilter, - // Disable unset followedUserIds for archive since archive earlybirds rely on this field - // to rewrite query to include protected Tweets - new UnsetSuperRootFieldsFilter(false), - new ClientLatencyFilter(EarlybirdCluster.FULL_ARCHIVE.getNameForStats())); - } - - @Provides - @Singleton - @Named(InjectionNames.PROTECTED) - private EarlybirdService.ServiceIface providesProtectedIface( - RemoteClientBuilder builder, - ResolverProxy proxy) throws Exception { - Name name = proxy.resolve(rootProtectedFlag.apply()); - return builder.createRemoteClient(name, "protected", "protected_"); - } - - @Provides - @Singleton - @Named(InjectionNames.PROTECTED) - private Service providesProtectedService( - @Named(InjectionNames.PROTECTED) - EarlybirdService.ServiceIface protectedServiceIface, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - StatsReceiver statsReceiver, - SearchDecider decider) { - return buildClientService( - protectedServiceIface, - new EarlybirdClusterAvailableFilter(decider, EarlybirdCluster.PROTECTED), - new MarkTweetSourceFilter(ThriftTweetSource.REALTIME_PROTECTED_CLUSTER), - new ServiceExceptionHandlingFilter(EarlybirdCluster.PROTECTED), - new ServiceResponseValidationFilter(EarlybirdCluster.PROTECTED), - new RequestTypeCountFilter(EarlybirdCluster.PROTECTED.getNameForStats()), - requestContextToEarlybirdRequestFilter, - new UnsetSuperRootFieldsFilter(), - new ClientLatencyFilter(EarlybirdCluster.PROTECTED.getNameForStats())); - } - - /** - * Builds a Finagle Service based on a EarlybirdService.ServiceIface. - */ - private Service buildClientService( - final EarlybirdService.ServiceIface serviceIface, - EarlybirdClusterAvailableFilter earlybirdClusterAvailableFilter, - MarkTweetSourceFilter markTweetSourceFilter, - ServiceExceptionHandlingFilter serviceExceptionHandlingFilter, - ServiceResponseValidationFilter serviceResponseValidationFilter, - RequestTypeCountFilter requestTypeCountFilter, - RequestContextToEarlybirdRequestFilter requestContextToEarlybirdRequestFilter, - UnsetSuperRootFieldsFilter unsetSuperRootFieldsFilter, - ClientLatencyFilter latencyFilter) { - Service service = - new Service() { - - @Override - public Future apply(EarlybirdRequest requestContext) { - return serviceIface.search(requestContext); - } - }; - - // We should apply ServiceResponseValidationFilter first, to validate the response. - // Then, if the response is valid, we should tag all results with the appropriate tweet source. - // ServiceExceptionHandlingFilter should come last, to catch all possible exceptions (that were - // thrown by the service, or by ServiceResponseValidationFilter and MarkTweetSourceFilter). - // - // But before we do all of this, we should apply the EarlybirdClusterAvailableFilter to see if - // we even need to send the request to this cluster. - return earlybirdClusterAvailableFilter - .andThen(serviceExceptionHandlingFilter) - .andThen(markTweetSourceFilter) - .andThen(serviceResponseValidationFilter) - .andThen(requestTypeCountFilter) - .andThen(requestContextToEarlybirdRequestFilter) - .andThen(latencyFilter) - .andThen(unsetSuperRootFieldsFilter) - .andThen(service); - } - - @Provides - public LoggingSupport provideLoggingSupport( - SearchDecider decider) { - return new EarlybirdServiceLoggingSupport(decider); - } - - @Provides - public ValidationBehavior provideValidationBehavior() { - return new EarlybirdServiceValidationBehavior(); - } - - /** - * Provides the penguin versions that we should use to retokenize the query if requested. - */ - @Provides - @Singleton - public PenguinVersionConfig providePenguinVersions() { - return SearchPenguinVersionsConfig.deserialize(penguinVersionsFlag.apply()); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.docx b/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.docx new file mode 100644 index 000000000..388f6c202 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.java b/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.java deleted file mode 100644 index 86e84a5c3..000000000 --- a/src/java/com/twitter/search/earlybird_root/SuperRootRequestTypeRouter.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.twitter.search.earlybird_root; - -import java.util.Map; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; - -import com.twitter.finagle.Service; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.ClientErrorException; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.search.earlybird_root.routers.FacetsRequestRouter; -import com.twitter.search.earlybird_root.routers.RecencyRequestRouter; -import com.twitter.search.earlybird_root.routers.RelevanceRequestRouter; -import com.twitter.search.earlybird_root.routers.RequestRouter; -import com.twitter.search.earlybird_root.routers.TermStatsRequestRouter; -import com.twitter.search.earlybird_root.routers.TopTweetsRequestRouter; -import com.twitter.util.Future; - -@Singleton -public class SuperRootRequestTypeRouter - extends Service { - - private final Map routingMap; - - /** - * constructor - */ - @Inject - public SuperRootRequestTypeRouter( - FacetsRequestRouter facetsRequestRouter, - TermStatsRequestRouter termStatsRequestRouter, - TopTweetsRequestRouter topTweetsRequestRouter, - RecencyRequestRouter recencyRequestRouter, - RelevanceRequestRouter relevanceRequestRouter - ) { - routingMap = Maps.immutableEnumMap( - ImmutableMap.builder() - .put(EarlybirdRequestType.FACETS, facetsRequestRouter) - .put(EarlybirdRequestType.TERM_STATS, termStatsRequestRouter) - .put(EarlybirdRequestType.TOP_TWEETS, topTweetsRequestRouter) - .put(EarlybirdRequestType.RECENCY, recencyRequestRouter) - .put(EarlybirdRequestType.STRICT_RECENCY, recencyRequestRouter) - .put(EarlybirdRequestType.RELEVANCE, relevanceRequestRouter) - .build()); - } - - @Override - public Future apply(EarlybirdRequestContext requestContext) { - EarlybirdRequest request = requestContext.getRequest(); - if (request.getSearchQuery() == null) { - return Future.exception(new ClientErrorException( - "Client must fill in search Query object in request")); - } - - EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType(); - - if (routingMap.containsKey(requestType)) { - RequestRouter router = routingMap.get(requestType); - return router.route(requestContext); - } else { - return Future.exception( - new ClientErrorException( - "Request type " + requestType + " is unsupported. " - + "Sorry this api is a bit hard to use.\n" - + "for facets, call earlybirdRequest.setFacetsRequest\n" - + "for termstats, call earluybirdRequest.setTermStatisticsRequest\n" - + "for recency, strict recency, relevance or toptweets,\n" - + " call req.setSearchQuery() and req.getSearchQuery().setRankingMode()\n" - + " with the correct ranking mode and for strict recency call\n" - + " earlybirdRequest.setQuerySource(ThriftQuerySource.GNIP)\n")); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootServer.docx b/src/java/com/twitter/search/earlybird_root/SuperRootServer.docx new file mode 100644 index 000000000..91b9463dd Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SuperRootServer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootServer.java b/src/java/com/twitter/search/earlybird_root/SuperRootServer.java deleted file mode 100644 index e1e9ba266..000000000 --- a/src/java/com/twitter/search/earlybird_root/SuperRootServer.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.search.common.root.SearchRootServer; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird_root.filters.QueryTokenizerFilter; -import com.twitter.search.queryparser.query.QueryParserException; - -@Singleton -public class SuperRootServer extends SearchRootServer { - private final QueryTokenizerFilter queryTokenizerFilter; - - @Inject - public SuperRootServer( - SuperRootService svc, - Service byteSvc, - QueryTokenizerFilter queryTokenizerFilter) { - super(svc, byteSvc); - - this.queryTokenizerFilter = queryTokenizerFilter; - } - - @Override - public void warmup() { - super.warmup(); - - try { - queryTokenizerFilter.performExpensiveInitialization(); - } catch (QueryParserException e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootService.docx b/src/java/com/twitter/search/earlybird_root/SuperRootService.docx new file mode 100644 index 000000000..e34d14d63 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/SuperRootService.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/SuperRootService.java b/src/java/com/twitter/search/earlybird_root/SuperRootService.java deleted file mode 100644 index c11052c35..000000000 --- a/src/java/com/twitter/search/earlybird_root/SuperRootService.java +++ /dev/null @@ -1,121 +0,0 @@ -package com.twitter.search.earlybird_root; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import com.twitter.finagle.Service; -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter; -import com.twitter.search.common.clientstats.FinagleClientStatsFilter; -import com.twitter.search.common.root.LoggingFilter; -import com.twitter.search.common.root.RequestValidationFilter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdService; -import com.twitter.search.earlybird.thrift.EarlybirdStatusResponse; -import com.twitter.search.earlybird_root.filters.ClientIdArchiveAccessFilter; -import com.twitter.search.earlybird_root.filters.ClientIdQuotaFilter; -import com.twitter.search.earlybird_root.filters.ClientIdTrackingFilter; -import com.twitter.search.earlybird_root.filters.ClientRequestTimeFilter; -import com.twitter.search.earlybird_root.filters.DeadlineTimeoutStatsFilter; -import com.twitter.search.earlybird_root.filters.DisableClientByTierFilter; -import com.twitter.search.earlybird_root.filters.EarlybirdFeatureSchemaAnnotateFilter; -import com.twitter.search.earlybird_root.filters.InitializeRequestContextFilter; -import com.twitter.search.earlybird_root.filters.MetadataTrackingFilter; -import com.twitter.search.earlybird_root.filters.NamedMultiTermDisjunctionStatsFilter; -import com.twitter.search.earlybird_root.filters.NullcastTrackingFilter; -import com.twitter.search.earlybird_root.filters.PreCacheRequestTypeCountFilter; -import com.twitter.search.earlybird_root.filters.QueryLangStatFilter; -import com.twitter.search.earlybird_root.filters.QueryOperatorStatFilter; -import com.twitter.search.earlybird_root.filters.QueryTokenizerFilter; -import com.twitter.search.earlybird_root.filters.RequestResultStatsFilter; -import com.twitter.search.earlybird_root.filters.RequestSuccessStatsFilter; -import com.twitter.search.earlybird_root.filters.ResponseCodeStatFilter; -import com.twitter.search.earlybird_root.filters.SearchPayloadSizeLocalContextFilter; -import com.twitter.search.earlybird_root.filters.RejectRequestsByQuerySourceFilter; -import com.twitter.search.earlybird_root.filters.StratoAttributionClientIdFilter; -import com.twitter.search.earlybird_root.filters.TopLevelExceptionHandlingFilter; -import com.twitter.search.earlybird_root.filters.VeryRecentTweetsFilter; -import com.twitter.util.Future; - -@Singleton -class SuperRootService implements EarlybirdService.ServiceIface { - private final Service fullSearchMethod; - - @Inject - public SuperRootService( - TopLevelExceptionHandlingFilter topLevelExceptionHandlingFilter, - ResponseCodeStatFilter responseCodeStatFilter, - LoggingFilter loggingFilter, - NamedMultiTermDisjunctionStatsFilter namedMultiTermDisjunctionStatsFilter, - RequestValidationFilter validationFilter, - MtlsServerSessionTrackerFilter mtlsFilter, - FinagleClientStatsFilter finagleStatsFilter, - InitializeFilter initializeFilter, - InitializeRequestContextFilter initializeRequestContextFilter, - QueryLangStatFilter queryLangStatFilter, - QueryOperatorStatFilter queryOperatorStatFilter, - RequestResultStatsFilter requestResultStatsFilter, - PreCacheRequestTypeCountFilter preCacheRequestTypeCountFilter, - ClientIdArchiveAccessFilter clientIdArchiveAccessFilter, - DisableClientByTierFilter disableClientByTierFilter, - ClientIdTrackingFilter clientIdTrackingFilter, - ClientIdQuotaFilter quotaFilter, - RejectRequestsByQuerySourceFilter rejectRequestsByQuerySourceFilter, - MetadataTrackingFilter metadataTrackingFilter, - VeryRecentTweetsFilter veryRecentTweetsFilter, - RequestSuccessStatsFilter requestSuccessStatsFilter, - NullcastTrackingFilter nullcastTrackingFilter, - QueryTokenizerFilter queryTokenizerFilter, - ClientRequestTimeFilter clientRequestTimeFilter, - DeadlineTimeoutStatsFilter deadlineTimeoutStatsFilter, - SuperRootRequestTypeRouter superRootSearchService, - EarlybirdFeatureSchemaAnnotateFilter featureSchemaAnnotateFilter, - SearchPayloadSizeLocalContextFilter searchPayloadSizeLocalContextFilter, - StratoAttributionClientIdFilter stratoAttributionClientIdFilter) { - this.fullSearchMethod = - loggingFilter - .andThen(topLevelExceptionHandlingFilter) - .andThen(stratoAttributionClientIdFilter) - .andThen(clientRequestTimeFilter) - .andThen(searchPayloadSizeLocalContextFilter) - .andThen(requestSuccessStatsFilter) - .andThen(requestResultStatsFilter) - .andThen(responseCodeStatFilter) - .andThen(validationFilter) - .andThen(mtlsFilter) - .andThen(finagleStatsFilter) - .andThen(disableClientByTierFilter) - .andThen(clientIdTrackingFilter) - .andThen(quotaFilter) - .andThen(clientIdArchiveAccessFilter) - .andThen(rejectRequestsByQuerySourceFilter) - .andThen(namedMultiTermDisjunctionStatsFilter) - .andThen(metadataTrackingFilter) - .andThen(veryRecentTweetsFilter) - .andThen(initializeFilter) - .andThen(initializeRequestContextFilter) - .andThen(deadlineTimeoutStatsFilter) - .andThen(queryLangStatFilter) - .andThen(nullcastTrackingFilter) - .andThen(queryOperatorStatFilter) - .andThen(preCacheRequestTypeCountFilter) - .andThen(queryTokenizerFilter) - .andThen(featureSchemaAnnotateFilter) - .andThen(superRootSearchService); - } - - @Override - public Future getName() { - return Future.value("superroot"); - } - - @Override - public Future getStatus() { - throw new UnsupportedOperationException("not supported"); - } - - @Override - public Future search(EarlybirdRequest request) { - return fullSearchMethod.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/BUILD b/src/java/com/twitter/search/earlybird_root/caching/BUILD deleted file mode 100644 index 9ea5a4041..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -java_library( - sources = ["*.java"], - platform = "java8", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/org/slf4j:slf4j-api", - "finatra/inject/inject-core/src/main/scala", - "src/java/com/twitter/search/common/caching", - "src/java/com/twitter/search/common/decider", - "src/java/com/twitter/search/common/metrics", - "src/java/com/twitter/search/common/root", - "src/java/com/twitter/search/earlybird/common", - "src/java/com/twitter/search/earlybird_root/common", - "src/java/com/twitter/search/queryparser", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/thrift/com/twitter/search:earlybird-java", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/caching/BUILD.docx b/src/java/com/twitter/search/earlybird_root/caching/BUILD.docx new file mode 100644 index 000000000..3a6ab6a8b Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.docx b/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.docx new file mode 100644 index 000000000..ce00c4eaa Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.java b/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.java deleted file mode 100644 index 6cadf565b..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/CacheCommonUtil.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.earlybird.thrift.EarlybirdResponse; - -public final class CacheCommonUtil { - public static final String NAMED_MAX_CACHE_RESULTS = "maxCacheResults"; - - private CacheCommonUtil() { - } - - public static boolean hasResults(EarlybirdResponse response) { - return response.isSetSearchResults() - && (response.getSearchResults().getResults() != null) - && !response.getSearchResults().getResults().isEmpty(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/CacheStats.docx b/src/java/com/twitter/search/earlybird_root/caching/CacheStats.docx new file mode 100644 index 000000000..6d456ddd0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/CacheStats.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/CacheStats.java b/src/java/com/twitter/search/earlybird_root/caching/CacheStats.java deleted file mode 100644 index 2c0896e0b..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/CacheStats.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.metrics.SearchRateCounter; - -public final class CacheStats { - public static final SearchRateCounter REQUEST_FAILED_COUNTER = - SearchRateCounter.export("memcache_request_failed"); - public static final SearchRateCounter REQUEST_TIMEOUT_COUNTER = - SearchRateCounter.export("memcache_request_timeout"); - - private CacheStats() { - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.docx b/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.docx new file mode 100644 index 000000000..08fbf2651 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.java b/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.java deleted file mode 100644 index fc14e2203..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/DefaultForcedCacheMissDecider.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; - -import com.twitter.common.base.Supplier; -import com.twitter.search.common.decider.SearchDecider; - -/** - * A cache miss decider backed by a decider key. - */ -public class DefaultForcedCacheMissDecider implements Supplier { - private static final String DECIDER_KEY = "default_forced_cache_miss_rate"; - private final SearchDecider decider; - - @Inject - public DefaultForcedCacheMissDecider(SearchDecider decider) { - this.decider = decider; - } - - @Override - public Boolean get() { - return decider.isAvailable(DECIDER_KEY); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.docx new file mode 100644 index 000000000..d04d4ad95 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.java deleted file mode 100644 index 04cd08e30..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdCachePostProcessor.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.filter.CachePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class EarlybirdCachePostProcessor - extends CachePostProcessor { - - @Override - public final void recordCacheHit(EarlybirdResponse response) { - response.setCacheHit(true); - } - - @Override - public Optional processCacheResponse(EarlybirdRequestContext originalRequest, - EarlybirdResponse cacheResponse) { - return Optional.of(cacheResponse); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.docx b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.docx new file mode 100644 index 000000000..030755f86 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.java b/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.java deleted file mode 100644 index 2b8d96179..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/EarlybirdRequestPerClientCacheStats.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import com.twitter.search.common.caching.filter.PerClientCacheStats; -import com.twitter.search.common.metrics.SearchRateCounter; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class EarlybirdRequestPerClientCacheStats - extends PerClientCacheStats { - - private String cacheOffByClientStatFormat; - private final Map cacheTurnedOffByClient; - - private String cacheHitsByClientStatFormat; - private final Map cacheHitsByClient; - - public EarlybirdRequestPerClientCacheStats(String cacheRequestType) { - this.cacheOffByClientStatFormat = - cacheRequestType + "_client_id_%s_cache_turned_off_in_request"; - this.cacheTurnedOffByClient = new ConcurrentHashMap<>(); - - this.cacheHitsByClientStatFormat = cacheRequestType + "_client_id_%s_cache_hit_total"; - this.cacheHitsByClient = new ConcurrentHashMap<>(); - } - - @Override - public void recordRequest(EarlybirdRequestContext requestContext) { - if (!EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest())) { - String client = requestContext.getRequest().getClientId(); - SearchRateCounter counter = cacheTurnedOffByClient.computeIfAbsent(client, - cl -> SearchRateCounter.export(String.format(cacheOffByClientStatFormat, cl))); - counter.increment(); - } - } - - @Override - public void recordCacheHit(EarlybirdRequestContext requestContext) { - String client = requestContext.getRequest().getClientId(); - SearchRateCounter counter = cacheHitsByClient.computeIfAbsent(client, - cl -> SearchRateCounter.export(String.format(cacheHitsByClientStatFormat, cl))); - counter.increment(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.docx b/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.docx new file mode 100644 index 000000000..2494c722d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.java b/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.java deleted file mode 100644 index 84b30502e..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/FacetsCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface FacetsCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.docx new file mode 100644 index 000000000..fa2df3ce0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.java deleted file mode 100644 index a06b2eda0..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheFilter.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class FacetsCacheFilter extends - CacheFilter { - /** - * Constructs a new cache filter for facet requests. - */ - @Inject - public FacetsCacheFilter( - @FacetsCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - super(cache, - new FacetsQueryCachePredicate(decider, normalizedSearchRootName), - new FacetsCacheRequestNormalizer(), - new EarlybirdCachePostProcessor(), - new FacetsServicePostProcessor(cache), - new EarlybirdRequestPerClientCacheStats(EarlybirdRequestType.FACETS.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.docx new file mode 100644 index 000000000..fc91b3c8a Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.java deleted file mode 100644 index b89be8ad3..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/FacetsCacheRequestNormalizer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.FacetsCacheUtil; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class FacetsCacheRequestNormalizer extends - CacheRequestNormalizer { - - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - return Optional.fromNullable(FacetsCacheUtil.normalizeRequestForCache( - requestContext.getRequest())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.docx new file mode 100644 index 000000000..15d38bb0e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.java deleted file mode 100644 index c7cb5c454..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/FacetsQueryCachePredicate.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class FacetsQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String facetsCacheEnabledDeciderKey; - - public FacetsQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.facetsCacheEnabledDeciderKey = "facets_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.FACETS == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(facetsCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.docx new file mode 100644 index 000000000..2a3b7bcf7 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.java deleted file mode 100644 index 74984a757..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/FacetsServicePostProcessor.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.FacetsCacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class FacetsServicePostProcessor - extends ServicePostProcessor { - - private final Cache cache; - - public FacetsServicePostProcessor(Cache cache) { - this.cache = cache; - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - FacetsCacheUtil.cacheResults(requestContext.getRequest(), serviceResponse, cache); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.docx new file mode 100644 index 000000000..6312e03ca Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.java deleted file mode 100644 index eb0752286..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyAndRelevanceCachePostProcessor.java +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.util.IdTimeRanges; - -public class RecencyAndRelevanceCachePostProcessor extends EarlybirdCachePostProcessor { - - private static final Logger LOG = - LoggerFactory.getLogger(RecencyAndRelevanceCachePostProcessor.class); - - protected Optional postProcessCacheResponse( - EarlybirdRequest earlybirdRequest, - EarlybirdResponse earlybirdResponse, long sinceID, long maxID) { - return CacheUtil.postProcessCacheResult( - earlybirdRequest, earlybirdResponse, sinceID, maxID); - } - - @Override - public final Optional processCacheResponse( - EarlybirdRequestContext requestContext, - EarlybirdResponse cacheResponse) { - EarlybirdRequest originalRequest = requestContext.getRequest(); - Preconditions.checkArgument(originalRequest.isSetSearchQuery()); - - IdTimeRanges ranges; - Query query = requestContext.getParsedQuery(); - if (query != null) { - try { - ranges = IdTimeRanges.fromQuery(query); - } catch (QueryParserException e) { - LOG.error( - "Exception when parsing since and max IDs. Request: {} Response: {}", - originalRequest, - cacheResponse, - e); - return Optional.absent(); - } - } else { - ranges = null; - } - - Optional sinceID; - Optional maxID; - if (ranges != null) { - sinceID = ranges.getSinceIDExclusive(); - maxID = ranges.getMaxIDInclusive(); - } else { - sinceID = Optional.absent(); - maxID = Optional.absent(); - } - - return postProcessCacheResponse( - originalRequest, cacheResponse, sinceID.or(0L), maxID.or(Long.MAX_VALUE)); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.docx new file mode 100644 index 000000000..c9deb630d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.java deleted file mode 100644 index 27b9abc72..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface RecencyCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.docx new file mode 100644 index 000000000..8a5ef623d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.java deleted file mode 100644 index 5d4772f70..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheFilter.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class RecencyCacheFilter extends - CacheFilter { - /** - * Creates a cache filter for earlybird recency requests. - */ - @Inject - public RecencyCacheFilter( - @RecencyCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName, - @Named(CacheCommonUtil.NAMED_MAX_CACHE_RESULTS) int maxCacheResults) { - super(cache, - new RecencyQueryCachePredicate(decider, normalizedSearchRootName), - new RecencyCacheRequestNormalizer(), - new RecencyAndRelevanceCachePostProcessor(), - new RecencyServicePostProcessor(cache, maxCacheResults), - new EarlybirdRequestPerClientCacheStats( - EarlybirdRequestType.RECENCY.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.docx new file mode 100644 index 000000000..ccdb18da2 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.java deleted file mode 100644 index 11d74370c..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyCacheRequestNormalizer.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RecencyCacheRequestNormalizer extends - CacheRequestNormalizer { - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - return Optional.fromNullable(CacheUtil.normalizeRequestForCache(requestContext.getRequest())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.docx new file mode 100644 index 000000000..f7d8f5b2b Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.java deleted file mode 100644 index 12778f922..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyQueryCachePredicate.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class RecencyQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String recencyCacheEnabledDeciderKey; - - public RecencyQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.recencyCacheEnabledDeciderKey = "recency_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext request) { - return EarlybirdRequestType.RECENCY == request.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(request.getRequest()) - && decider.isAvailable(recencyCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.docx new file mode 100644 index 000000000..36014a3de Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.java deleted file mode 100644 index 35ab01e99..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RecencyServicePostProcessor.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RecencyServicePostProcessor - extends ServicePostProcessor { - private final Cache cache; - private final int maxCacheResults; - - public RecencyServicePostProcessor( - Cache cache, - int maxCacheResults) { - this.cache = cache; - this.maxCacheResults = maxCacheResults; - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - CacheUtil.cacheResults(cache, requestContext.getRequest(), serviceResponse, maxCacheResults); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.docx new file mode 100644 index 000000000..fa5b8528f Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.java deleted file mode 100644 index b44a7950a..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface RelevanceCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.docx new file mode 100644 index 000000000..351ba4dde Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.java deleted file mode 100644 index bd1d718a9..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheFilter.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class RelevanceCacheFilter extends - CacheFilter { - /** - * Creates a cache filter for earlybird relevance requests - */ - @Inject - public RelevanceCacheFilter( - @RelevanceCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - super(cache, - new RelevanceQueryCachePredicate(decider, normalizedSearchRootName), - new RelevanceCacheRequestNormalizer(decider, normalizedSearchRootName), - new RecencyAndRelevanceCachePostProcessor(), - new RelevanceServicePostProcessor(cache), - new EarlybirdRequestPerClientCacheStats( - EarlybirdRequestType.RELEVANCE.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.docx new file mode 100644 index 000000000..59375016c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.java deleted file mode 100644 index 6f01eb63d..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceCacheRequestNormalizer.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RelevanceCacheRequestNormalizer extends - CacheRequestNormalizer { - private static final SearchCounter RELEVANCE_FORCE_CACHED_LOGGED_IN_REQUEST = - SearchCounter.export("relevance_force_cached_logged_in_request"); - - private final SearchDecider decider; - private final String relevanceStripPersonalizationFieldsDeciderKey; - - public RelevanceCacheRequestNormalizer( - SearchDecider decider, - String normalizedSearchRootName) { - this.decider = decider; - this.relevanceStripPersonalizationFieldsDeciderKey = - String.format("relevance_%s_force_cache_logged_in_requests", normalizedSearchRootName); - } - - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - boolean cacheLoggedInRequest = - decider.isAvailable(relevanceStripPersonalizationFieldsDeciderKey); - - if (cacheLoggedInRequest) { - RELEVANCE_FORCE_CACHED_LOGGED_IN_REQUEST.increment(); - } - - return Optional.fromNullable(CacheUtil.normalizeRequestForCache( - requestContext.getRequest(), cacheLoggedInRequest)); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.docx new file mode 100644 index 000000000..f105b1d3c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.java deleted file mode 100644 index a7767682e..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceQueryCachePredicate.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class RelevanceQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String relevanceCacheEnabledDeciderKey; - - public RelevanceQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.relevanceCacheEnabledDeciderKey = "relevance_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.RELEVANCE == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(relevanceCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.docx new file mode 100644 index 000000000..a337bad68 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.java deleted file mode 100644 index 7dcaaaf52..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceServicePostProcessor.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RelevanceServicePostProcessor - extends ServicePostProcessor { - private final Cache cache; - - public RelevanceServicePostProcessor( - Cache cache) { - this.cache = cache; - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - CacheUtil.cacheResults(cache, requestContext.getRequest(), serviceResponse, Integer.MAX_VALUE); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.docx new file mode 100644 index 000000000..4aabdd373 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.java deleted file mode 100644 index f0a3b8cf5..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheFilter.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -/** - * A filter that: - * - Strips the request of all personalization fields, normalizes it and looks it up in the cache. - * If it finds a response with 0 results in the cache, it returns it. - * - Caches the response for a personalized query, whenever the response has 0 results. The cache - * key is the normalized request with all personalization fields stripped. - * - * If a query (from a logged in or logged out user) returns 0 results, then the same query will - * always return 0 results, for all users. So we can cache that result. - */ -public class RelevanceZeroResultsCacheFilter - extends CacheFilter { - - /** Creates a filter that caches relevance requests with 0 results. */ - @Inject - public RelevanceZeroResultsCacheFilter( - @RelevanceCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - super(cache, - new RelevanceZeroResultsQueryCachePredicate(decider, normalizedSearchRootName), - new RelevanceZeroResultsCacheRequestNormalizer(), - new RelevanceZeroResultsCachePostProcessor(), - new RelevanceZeroResultsServicePostProcessor(cache), - new EarlybirdRequestPerClientCacheStats("relevance_zero_results")); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.docx new file mode 100644 index 000000000..49db38e48 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.java deleted file mode 100644 index 41ebeff06..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCachePostProcessor.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; - -public class RelevanceZeroResultsCachePostProcessor extends RecencyAndRelevanceCachePostProcessor { - @Override - protected Optional postProcessCacheResponse( - EarlybirdRequest request, EarlybirdResponse response, long sinceId, long maxId) { - // If a query (from a logged in or logged out user) returns 0 results, then the same query will - // always return 0 results, for all users. So we can cache that result. - if (CacheCommonUtil.hasResults(response)) { - return Optional.absent(); - } - - return Optional.of(response); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.docx new file mode 100644 index 000000000..8931adfa0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.java deleted file mode 100644 index 6e5284588..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsCacheRequestNormalizer.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.SearchQueryNormalizer; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RelevanceZeroResultsCacheRequestNormalizer - extends CacheRequestNormalizer { - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - // If the query is not personalized, it means that: - // - RelevanceCacheRequestNormalizer has already normalized it into a cacheable query. - // - RelevanceCacheFilter could not find a response for this query in the cache. - // - // So if we try to normalize it here again, we will succeed, but then - // RelevanceZeroResultsCacheFilter will do the same look up in the cache, which will again - // result in a cache miss. There is no need to do this look up twice, so if the query is not - // personalized, return Optional.absent(). - // - // If the query is personalized, strip all personalization fields and normalize the request. - if (!SearchQueryNormalizer.queryIsPersonalized(requestContext.getRequest().getSearchQuery())) { - return Optional.absent(); - } - return Optional.fromNullable( - CacheUtil.normalizeRequestForCache(requestContext.getRequest(), true)); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.docx new file mode 100644 index 000000000..5a1e77629 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.java deleted file mode 100644 index 8d04bceb2..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsQueryCachePredicate.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class RelevanceZeroResultsQueryCachePredicate - extends QueryCachePredicate { - private final SearchDecider decider; - private final String relevanceCacheEnabledDeciderKey; - private final String relevanceZeroResultsCacheEnabledDeciderKey; - - public RelevanceZeroResultsQueryCachePredicate( - SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.relevanceCacheEnabledDeciderKey = - "relevance_cache_enabled_" + normalizedSearchRootName; - this.relevanceZeroResultsCacheEnabledDeciderKey = - "relevance_zero_results_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.RELEVANCE == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(relevanceCacheEnabledDeciderKey) - && decider.isAvailable(relevanceZeroResultsCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.docx new file mode 100644 index 000000000..836a29224 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.java deleted file mode 100644 index 6fe7c405f..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/RelevanceZeroResultsServicePostProcessor.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.CacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class RelevanceZeroResultsServicePostProcessor - extends ServicePostProcessor { - - private static final SearchCounter RELEVANCE_RESPONSES_WITH_ZERO_RESULTS_COUNTER = - SearchCounter.export("relevance_responses_with_zero_results"); - - private final Cache cache; - - public RelevanceZeroResultsServicePostProcessor( - Cache cache) { - this.cache = cache; - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - // serviceResponse is the response to a personalized query. If it has zero results, then we can - // cache it and reuse it for other requests with the same query. Otherwise, it makes no sense to - // cache this response. - if (!CacheCommonUtil.hasResults(serviceResponse)) { - RELEVANCE_RESPONSES_WITH_ZERO_RESULTS_COUNTER.increment(); - CacheUtil.cacheResults( - cache, requestContext.getRequest(), serviceResponse, Integer.MAX_VALUE); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.docx b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.docx new file mode 100644 index 000000000..b945abf6b Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.java b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.java deleted file mode 100644 index b56733227..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface StrictRecencyCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.docx new file mode 100644 index 000000000..791d2bc6e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.java deleted file mode 100644 index 22b5b1023..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyCacheFilter.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class StrictRecencyCacheFilter extends - CacheFilter { - /** - * Creates a cache filter for earlybird strict recency requests. - */ - @Inject - public StrictRecencyCacheFilter( - @StrictRecencyCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName, - @Named(CacheCommonUtil.NAMED_MAX_CACHE_RESULTS) int maxCacheResults) { - super(cache, - new StrictRecencyQueryCachePredicate(decider, normalizedSearchRootName), - new RecencyCacheRequestNormalizer(), - new RecencyAndRelevanceCachePostProcessor(), - new RecencyServicePostProcessor(cache, maxCacheResults), - new EarlybirdRequestPerClientCacheStats( - EarlybirdRequestType.STRICT_RECENCY.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.docx new file mode 100644 index 000000000..af2402f4f Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.java deleted file mode 100644 index 665b0917f..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/StrictRecencyQueryCachePredicate.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class StrictRecencyQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String strictRecencyCacheEnabledDeciderKey; - - public StrictRecencyQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.strictRecencyCacheEnabledDeciderKey = - "strict_recency_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.STRICT_RECENCY == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(strictRecencyCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.docx b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.docx new file mode 100644 index 000000000..b1d779ddf Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.java b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.java deleted file mode 100644 index 3f3458fbc..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface TermStatsCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.docx new file mode 100644 index 000000000..1ba03e5b6 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.java deleted file mode 100644 index 833b8909f..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheFilter.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class TermStatsCacheFilter extends - CacheFilter { - /** - * Constructs a new cache filter for term stats requests. - */ - @Inject - public TermStatsCacheFilter( - @TermStatsCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - super(cache, - new TermStatsQueryCachePredicate(decider, normalizedSearchRootName), - new TermStatsCacheRequestNormalizer(), - new EarlybirdCachePostProcessor(), - new TermStatsServicePostProcessor(cache), - new EarlybirdRequestPerClientCacheStats( - EarlybirdRequestType.TERM_STATS.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.docx new file mode 100644 index 000000000..ea0f792ae Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.java deleted file mode 100644 index f804a6eb3..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TermStatsCacheRequestNormalizer.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.TermStatsCacheUtil; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class TermStatsCacheRequestNormalizer extends - CacheRequestNormalizer { - - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - return Optional.fromNullable(TermStatsCacheUtil.normalizeForCache(requestContext.getRequest())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.docx new file mode 100644 index 000000000..fbc779f82 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.java deleted file mode 100644 index 34ca56870..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TermStatsQueryCachePredicate.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class TermStatsQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String termstatsCacheEnabledDeciderKey; - - public TermStatsQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.termstatsCacheEnabledDeciderKey = "termstats_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.TERM_STATS == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(termstatsCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.docx new file mode 100644 index 000000000..888d8636a Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.java deleted file mode 100644 index 58e16d371..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TermStatsServicePostProcessor.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Preconditions; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.TermStatsCacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class TermStatsServicePostProcessor - extends ServicePostProcessor { - private final Cache cache; - - public TermStatsServicePostProcessor(Cache cache) { - this.cache = Preconditions.checkNotNull(cache); - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - TermStatsCacheUtil.cacheResults(cache, requestContext.getRequest(), serviceResponse); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.docx b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.docx new file mode 100644 index 000000000..fbaadf934 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.java b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.java deleted file mode 100644 index c79413312..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCache.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import com.google.inject.BindingAnnotation; - -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({ ElementType.FIELD, ElementType.PARAMETER, ElementType.METHOD }) -@BindingAnnotation -public @interface TopTweetsCache { -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.docx b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.docx new file mode 100644 index 000000000..ae4691944 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.java b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.java deleted file mode 100644 index afbbde5a2..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheFilter.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import javax.inject.Inject; -import javax.inject.Named; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.filter.CacheFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.root.SearchRootModule; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class TopTweetsCacheFilter extends - CacheFilter { - /** - * Constructs a new cache filter for top tweets requests. - */ - @Inject - public TopTweetsCacheFilter( - @TopTweetsCache Cache cache, - SearchDecider decider, - @Named(SearchRootModule.NAMED_NORMALIZED_SEARCH_ROOT_NAME) String normalizedSearchRootName) { - super(cache, - new TopTweetsQueryCachePredicate(decider, normalizedSearchRootName), - new TopTweetsCacheRequestNormalizer(), - new EarlybirdCachePostProcessor(), - new TopTweetsServicePostProcessor(cache), - new EarlybirdRequestPerClientCacheStats( - EarlybirdRequestType.TOP_TWEETS.getNormalizedName())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.docx b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.docx new file mode 100644 index 000000000..7629ebf66 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.java b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.java deleted file mode 100644 index f790f97c2..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsCacheRequestNormalizer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.google.common.base.Optional; - -import com.twitter.search.common.caching.TopTweetsCacheUtil; -import com.twitter.search.common.caching.filter.CacheRequestNormalizer; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -public class TopTweetsCacheRequestNormalizer extends - CacheRequestNormalizer { - - @Override - public Optional normalizeRequest(EarlybirdRequestContext requestContext) { - return Optional.fromNullable( - TopTweetsCacheUtil.normalizeTopTweetsRequestForCache(requestContext.getRequest())); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.docx b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.docx new file mode 100644 index 000000000..f4ee2554c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.java b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.java deleted file mode 100644 index 2e8fda2c6..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsQueryCachePredicate.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import com.twitter.search.common.caching.filter.QueryCachePredicate; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.EarlybirdRequestUtil; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; - -public class TopTweetsQueryCachePredicate extends QueryCachePredicate { - private final SearchDecider decider; - private final String toptweetsCacheEnabledDeciderKey; - - public TopTweetsQueryCachePredicate(SearchDecider decider, String normalizedSearchRootName) { - this.decider = decider; - this.toptweetsCacheEnabledDeciderKey = "toptweets_cache_enabled_" + normalizedSearchRootName; - } - - @Override - public Boolean shouldQueryCache(EarlybirdRequestContext requestContext) { - return EarlybirdRequestType.TOP_TWEETS == requestContext.getEarlybirdRequestType() - && EarlybirdRequestUtil.isCachingAllowed(requestContext.getRequest()) - && decider.isAvailable(toptweetsCacheEnabledDeciderKey); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.docx b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.docx new file mode 100644 index 000000000..9a4fc427d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.java b/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.java deleted file mode 100644 index 5812404a1..000000000 --- a/src/java/com/twitter/search/earlybird_root/caching/TopTweetsServicePostProcessor.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.search.earlybird_root.caching; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.caching.Cache; -import com.twitter.search.common.caching.TopTweetsCacheUtil; -import com.twitter.search.common.caching.filter.ServicePostProcessor; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; - -import static com.google.common.base.Preconditions.checkNotNull; - -public class TopTweetsServicePostProcessor - extends ServicePostProcessor { - private static final Logger LOG = LoggerFactory.getLogger(TopTweetsServicePostProcessor.class); - - public static final int CACHE_AGE_IN_MS = 600000; - public static final int NO_RESULT_CACHE_AGE_IN_MS = 300000; - - private final Cache cache; - - public TopTweetsServicePostProcessor(Cache cache) { - this.cache = checkNotNull(cache); - } - - @Override - public void processServiceResponse(EarlybirdRequestContext requestContext, - EarlybirdResponse serviceResponse) { - - EarlybirdRequest originalRequest = requestContext.getRequest(); - LOG.debug("Writing to top tweets cache. Request: {}, Response: {}", - originalRequest, serviceResponse); - TopTweetsCacheUtil.cacheResults(originalRequest, - serviceResponse, - cache, - CACHE_AGE_IN_MS, - NO_RESULT_CACHE_AGE_IN_MS); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/collectors/BUILD b/src/java/com/twitter/search/earlybird_root/collectors/BUILD deleted file mode 100644 index bbb90ada1..000000000 --- a/src/java/com/twitter/search/earlybird_root/collectors/BUILD +++ /dev/null @@ -1,12 +0,0 @@ -java_library( - sources = ["*.java"], - platform = "java8", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/log4j", - "src/java/com/twitter/search/common/relevance:utils", - "src/java/com/twitter/search/common/util/earlybird", - "src/thrift/com/twitter/search:earlybird-java", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/collectors/BUILD.docx b/src/java/com/twitter/search/earlybird_root/collectors/BUILD.docx new file mode 100644 index 000000000..fde3fdf9e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/collectors/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.docx b/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.docx new file mode 100644 index 000000000..8dd945550 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.java b/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.java deleted file mode 100644 index 8423007ee..000000000 --- a/src/java/com/twitter/search/earlybird_root/collectors/MultiwayMergeCollector.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.twitter.search.earlybird_root.collectors; - -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.earlybird.thrift.EarlybirdResponse; - -/** - * Generic MultiwayMergeCollector class for doing k-way merge of earlybird responses - * that takes a comparator and returns a list of results sorted by the comparator. - */ -public abstract class MultiwayMergeCollector { - protected static final Logger LOG = LoggerFactory.getLogger(MultiwayMergeCollector.class); - - private final Comparator resultComparator; - private final int numResponsesToMerge; - private final List results = Lists.newArrayList(); - private int numResponsesAdded = 0; - - /** - * Constructor that does multi way merge and takes in a custom predicate search result filter. - */ - public MultiwayMergeCollector(int numResponses, - Comparator comparator) { - Preconditions.checkNotNull(comparator); - this.resultComparator = comparator; - this.numResponsesToMerge = numResponses; - } - - /** - * Add a single response from one partition, updates stats. - * - * @param response response from one partition - */ - public final void addResponse(EarlybirdResponse response) { - // On prod, does it ever happen we receive more responses than numPartitions ? - Preconditions.checkArgument(numResponsesAdded++ < numResponsesToMerge, - String.format("Attempting to merge more than %d responses", numResponsesToMerge)); - if (!isResponseValid(response)) { - return; - } - collectStats(response); - List resultsFromResponse = collectResults(response); - if (resultsFromResponse != null && resultsFromResponse.size() > 0) { - results.addAll(resultsFromResponse); - } - } - - /** - * Parse the EarlybirdResponse and retrieve list of results to be appended. - * - * @param response earlybird response from where results are extracted - * @return resultsList to be appended - */ - protected abstract List collectResults(EarlybirdResponse response); - - /** - * It is recommended that sub-class overrides this function to add custom logic to - * collect more stat and call this base function. - */ - protected void collectStats(EarlybirdResponse response) { - } - - /** - * Get full list of results, after addResponse calls have been invoked. - * - * @return list of results extracted from all EarlybirdResponses that have been collected so far - */ - protected final List getResultsList() { - Collections.sort(results, resultComparator); - return results; - } - - protected abstract boolean isResponseValid(EarlybirdResponse response); -} diff --git a/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.docx b/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.docx new file mode 100644 index 000000000..885a5f046 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.java b/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.java deleted file mode 100644 index 42d9bdf6a..000000000 --- a/src/java/com/twitter/search/earlybird_root/collectors/RecencyMergeCollector.java +++ /dev/null @@ -1,75 +0,0 @@ -package com.twitter.search.earlybird_root.collectors; - -import java.util.Comparator; -import java.util.List; - -import com.twitter.search.common.relevance.utils.ResultComparators; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.ThriftSearchResult; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; - -/** - * {@link RecencyMergeCollector} inherits {@link MultiwayMergeCollector} for the type - * {@link com.twitter.search.earlybird.thrift.ThriftSearchResult} as the result type. - *

- * It also implements two public methods to retrieve the top-k or all results. - */ -public class RecencyMergeCollector extends MultiwayMergeCollector { - - // Container for the final results array and also stats like numHitsProcessed etc... - protected final ThriftSearchResults finalResults = new ThriftSearchResults(); - - public RecencyMergeCollector(int numResponses) { - this(numResponses, ResultComparators.ID_COMPARATOR); - } - - protected RecencyMergeCollector(int numResponses, Comparator comparator) { - super(numResponses, comparator); - } - - @Override - protected void collectStats(EarlybirdResponse response) { - super.collectStats(response); - - ThriftSearchResults searchResults = response.getSearchResults(); - if (searchResults.isSetNumHitsProcessed()) { - finalResults.setNumHitsProcessed( - finalResults.getNumHitsProcessed() + searchResults.getNumHitsProcessed()); - } - if (searchResults.isSetNumPartitionsEarlyTerminated()) { - finalResults.setNumPartitionsEarlyTerminated( - finalResults.getNumPartitionsEarlyTerminated() - + searchResults.getNumPartitionsEarlyTerminated()); - } - } - - @Override - protected final List collectResults(EarlybirdResponse response) { - if (response != null - && response.isSetSearchResults() - && response.getSearchResults().getResultsSize() > 0) { - return response.getSearchResults().getResults(); - } else { - return null; - } - } - - /** - * Gets all the results that has been collected. - * - * @return {@link ThriftSearchResults} containing a list of results sorted by provided - * comparator in descending order. - */ - public final ThriftSearchResults getAllSearchResults() { - return finalResults.setResults(getResultsList()); - } - - @Override - protected final boolean isResponseValid(EarlybirdResponse response) { - if (response == null || !response.isSetSearchResults()) { - LOG.warn("searchResults was null: " + response); - return false; - } - return true; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.docx b/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.docx new file mode 100644 index 000000000..f71fc3095 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.java b/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.java deleted file mode 100644 index 7331083a1..000000000 --- a/src/java/com/twitter/search/earlybird_root/collectors/RelevanceMergeCollector.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.search.earlybird_root.collectors; - -import com.twitter.search.common.relevance.utils.ResultComparators; -import com.twitter.search.common.util.earlybird.ThriftSearchResultsRelevanceStatsUtil; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.ThriftSearchResultsRelevanceStats; - -/** - * RelevanceMergeCollector class extends (@link RecencyMergeCollector} to do k-way merge of - * earlybird responses, but sorted by relevance score. - * - * Note that this is a superset of functionality found in - * {@link com.twitter.search.blender.services.earlybird.relevance.RelevanceCollector} - * If you make changes here, evaluate if they should be made in RelevanceCollector as well. - */ -public class RelevanceMergeCollector extends RecencyMergeCollector { - - public RelevanceMergeCollector(int numResponses) { - super(numResponses, ResultComparators.SCORE_COMPARATOR); - } - - @Override - protected void collectStats(EarlybirdResponse response) { - super.collectStats(response); - - if (!response.getSearchResults().isSetRelevanceStats()) { - return; - } - - if (!finalResults.isSetRelevanceStats()) { - finalResults.setRelevanceStats(new ThriftSearchResultsRelevanceStats()); - } - - ThriftSearchResultsRelevanceStats base = finalResults.getRelevanceStats(); - ThriftSearchResultsRelevanceStats delta = response.getSearchResults().getRelevanceStats(); - - ThriftSearchResultsRelevanceStatsUtil.addRelevanceStats(base, delta); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/BUILD b/src/java/com/twitter/search/earlybird_root/common/BUILD deleted file mode 100644 index 57ad0ffa4..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -java_library( - sources = ["*.java"], - platform = "java8", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/commons-lang", - "3rdparty/jvm/org/slf4j:slf4j-api", - "src/java/com/twitter/common/util:system-mocks", - "src/java/com/twitter/search/common/decider", - "src/java/com/twitter/search/common/metrics", - "src/java/com/twitter/search/common/partitioning/snowflakeparser", - "src/java/com/twitter/search/queryparser", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/search:earlybird-java", - "src/thrift/com/twitter/search/common:constants-java", - "src/thrift/com/twitter/search/common:features-java", - "twitter-context/src/main/scala", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/common/BUILD.docx b/src/java/com/twitter/search/earlybird_root/common/BUILD.docx new file mode 100644 index 000000000..d0f84629b Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.docx b/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.docx new file mode 100644 index 000000000..667008d8e Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.java b/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.java deleted file mode 100644 index 98c2bb011..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/ClientErrorException.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -public class ClientErrorException extends RuntimeException { - - public ClientErrorException() { - } - - public ClientErrorException(String message) { - super(message); - } - - public ClientErrorException(String message, Throwable cause) { - super(message, cause); - } - - public ClientErrorException(Throwable cause) { - super(cause); - } - - public ClientErrorException(String message, Throwable cause, - boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.docx b/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.docx new file mode 100644 index 000000000..503f1df5f Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java deleted file mode 100644 index f91d2d3c4..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java +++ /dev/null @@ -1,377 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; - -import javax.annotation.concurrent.ThreadSafe; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Maps; - -import org.apache.commons.lang.mutable.MutableInt; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.features.thrift.ThriftSearchFeatureSchema; -import com.twitter.search.common.features.thrift.ThriftSearchFeatureSchemaSpecifier; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchLongGauge; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; - -@ThreadSafe -public class EarlybirdFeatureSchemaMerger { - private static final Logger LOG = LoggerFactory.getLogger(EarlybirdFeatureSchemaMerger.class); - - private static final SearchLongGauge NUM_FEATURE_SCHEMAS_MAP = SearchLongGauge.export( - "earlybird_feature_schema_cached_cnt"); - - private class Stats { - public final SearchCounter fieldFormatResponses; - public final SearchCounter mapFormatResponses; - public final SearchCounter mapFormatSavedSchemaResponses; - public final SearchCounter mapFormatAllDownstreamMissingSchema; - public final SearchCounter mapFormatOneDownstreamMissingSchema; - public final SearchCounter mapFormatSchemaCachedMismatch; - public final SearchCounter numInvalidRankingModeRequests; - public final SearchCounter numEmptyResponses; - - public Stats(String prefix) { - this.fieldFormatResponses = - SearchCounter.export( - "earlybird_feature_schema_" + prefix + "_field_format_feature_responses"); - this.mapFormatResponses = - SearchCounter.export( - "earlybird_feature_schema_" + prefix + "_map_format_feature_responses"); - this.mapFormatSavedSchemaResponses = - SearchCounter.export( - "earlybird_feature_schema_" + prefix + "_map_format_feature_saved_schema_responses"); - this.mapFormatAllDownstreamMissingSchema = - SearchCounter.export( - "earlybird_feature_schema_" + prefix - + "_map_format_feature_all_downstream_missing_schema_error"); - this.mapFormatOneDownstreamMissingSchema = - SearchCounter.export( - "earlybird_feature_schema_" + prefix - + "_map_format_feature_one_downstream_missing_schema_error"); - this.mapFormatSchemaCachedMismatch = - SearchCounter.export( - "earlybird_feature_schema_" + prefix - + "_map_format_feature_schema_cached_mismatch_error"); - this.numInvalidRankingModeRequests = - SearchCounter.export( - "earlybird_feature_schema_" + prefix + "_num_invalid_ranking_mode_requests"); - this.numEmptyResponses = - SearchCounter.export( - "earlybird_feature_schema_" + prefix - + "_num_empty_response_without_schema"); - } - } - - private final ConcurrentHashMap - featureSchemas = new ConcurrentHashMap<>(); - private final ConcurrentHashMap mergeStats = new ConcurrentHashMap<>(); - - /** - * Get all available cache schema list indicated by the schema specifier. - * @return identifiers for all the cached schema - */ - public List getAvailableSchemaList() { - return ImmutableList.copyOf(featureSchemas.keySet()); - } - - /** - * Iterate all the responses and collect and cache feature schemas from response. - * Set the feature schema for the response in searchResults if needed. - * (This is done inside earlybird roots) - * - * @param searchResults the response - * @param requestContext the request, which should record the client cached feature schemas - * @param statPrefix the stats prefix string - * @param successfulResponses all successfull responses from downstream - */ - public void collectAndSetFeatureSchemaInResponse( - ThriftSearchResults searchResults, - EarlybirdRequestContext requestContext, - String statPrefix, - List successfulResponses) { - Stats stats = getOrCreateMergeStat(statPrefix); - EarlybirdRequest request = requestContext.getRequest(); - if (!request.isSetSearchQuery() - || !request.getSearchQuery().isSetResultMetadataOptions() - || !request.getSearchQuery().getResultMetadataOptions().isReturnSearchResultFeatures()) { - // If the client does not want to get all features in map format, do not do anything. - stats.fieldFormatResponses.increment(); - return; - } - - // Find the most occurred schema from per-merge responses and return it in the post-merge - // response. - ThriftSearchFeatureSchemaSpecifier schemaMostOccurred = findMostOccurredSchema( - stats, request, successfulResponses); - if (schemaMostOccurred == null) { - return; - } - - Set availableSchemasInClient = - requestContext.getFeatureSchemasAvailableInClient(); - if (availableSchemasInClient != null && availableSchemasInClient.contains(schemaMostOccurred)) { - // The client already knows the schema that we used for this response, so we don't need to - // send it the full schema, just the ThriftSearchFeatureSchemaSpecifier. - ThriftSearchFeatureSchema schema = new ThriftSearchFeatureSchema(); - schema.setSchemaSpecifier(schemaMostOccurred); - searchResults.setFeatureSchema(schema); - stats.mapFormatResponses.increment(); - stats.mapFormatSavedSchemaResponses.increment(); - } else { - ThriftSearchFeatureSchema schema = featureSchemas.get(schemaMostOccurred); - if (schema != null) { - Preconditions.checkState(schema.isSetEntries()); - Preconditions.checkState(schema.isSetSchemaSpecifier()); - searchResults.setFeatureSchema(schema); - stats.mapFormatResponses.increment(); - } else { - stats.mapFormatSchemaCachedMismatch.increment(); - LOG.error("The feature schema cache misses the schema entry {} it should cache for {}", - schemaMostOccurred, request); - } - } - } - - /** - * Merge the feature schema from each cluster's response and return it to the client. - * (This is done inside superroot) - * @param requestContext the search request context - * @param mergedResponse the merged result inside the superroot - * @param realtimeResponse the realtime tier resposne - * @param protectedResponse the protected tier response - * @param fullArchiveResponse the full archive tier response - * @param statsPrefix - */ - public void mergeFeatureSchemaAcrossClusters( - EarlybirdRequestContext requestContext, - EarlybirdResponse mergedResponse, - String statsPrefix, - EarlybirdResponse realtimeResponse, - EarlybirdResponse protectedResponse, - EarlybirdResponse fullArchiveResponse) { - Stats superrootStats = getOrCreateMergeStat(statsPrefix); - - // Only try to merge feature schema if there are search results. - ThriftSearchResults mergedResults = Preconditions.checkNotNull( - mergedResponse.getSearchResults()); - if (mergedResults.getResults().isEmpty()) { - mergedResults.unsetFeatureSchema(); - superrootStats.numEmptyResponses.increment(); - return; - } - - EarlybirdRequest request = requestContext.getRequest(); - if (!request.isSetSearchQuery() - || !request.getSearchQuery().isSetResultMetadataOptions() - || !request.getSearchQuery().getResultMetadataOptions().isReturnSearchResultFeatures()) { - mergedResults.unsetFeatureSchema(); - - // If the client does not want to get all features in map format, do not do anything. - superrootStats.fieldFormatResponses.increment(); - return; - } - if (request.getSearchQuery().getRankingMode() != ThriftSearchRankingMode.RELEVANCE - && request.getSearchQuery().getRankingMode() != ThriftSearchRankingMode.TOPTWEETS - && request.getSearchQuery().getRankingMode() != ThriftSearchRankingMode.RECENCY) { - mergedResults.unsetFeatureSchema(); - - // Only RELEVANCE, TOPTWEETS and RECENCY requests might need a feature schema in the response. - superrootStats.numInvalidRankingModeRequests.increment(); - LOG.warn("Request asked for feature schema, but has incorrect ranking mode: {}", request); - return; - } - superrootStats.mapFormatResponses.increment(); - - ThriftSearchFeatureSchema schema = updateReturnSchemaForClusterResponse( - null, realtimeResponse, request, superrootStats); - schema = updateReturnSchemaForClusterResponse( - schema, protectedResponse, request, superrootStats); - schema = updateReturnSchemaForClusterResponse( - schema, fullArchiveResponse, request, superrootStats); - - if (schema != null) { - if (requestContext.getFeatureSchemasAvailableInClient() != null - && requestContext.getFeatureSchemasAvailableInClient().contains( - schema.getSchemaSpecifier())) { - mergedResults.setFeatureSchema( - new ThriftSearchFeatureSchema().setSchemaSpecifier(schema.getSchemaSpecifier())); - } else { - mergedResults.setFeatureSchema(schema); - } - } else { - superrootStats.mapFormatAllDownstreamMissingSchema.increment(); - LOG.error("The response for request {} is missing feature schema from all clusters", request); - } - } - - /** - * Add the schema to both the schema map and and the schema list if it is not there yet. - * - * @param schema the feature schema for search results - */ - private void addNewSchema(ThriftSearchFeatureSchema schema) { - if (!schema.isSetEntries() - || !schema.isSetSchemaSpecifier() - || featureSchemas.containsKey(schema.getSchemaSpecifier())) { - return; - } - - synchronized (this) { - String oldExportedSchemaName = null; - if (!featureSchemas.isEmpty()) { - oldExportedSchemaName = getExportSchemasName(); - } - - if (featureSchemas.putIfAbsent(schema.getSchemaSpecifier(), schema) == null) { - LOG.info("Add new feature schema {} into the list", schema); - NUM_FEATURE_SCHEMAS_MAP.set(featureSchemas.size()); - - if (oldExportedSchemaName != null) { - SearchLongGauge.export(oldExportedSchemaName).reset(); - } - SearchLongGauge.export(getExportSchemasName()).set(1); - LOG.info("Expanded feature schema: {}", ImmutableList.copyOf(featureSchemas.keySet())); - } - } - } - - private String getExportSchemasName() { - StringBuilder builder = new StringBuilder("earlybird_feature_schema_cached"); - TreeSet exportedVersions = new TreeSet<>(); - - // We do not need checksum for exported vars as all cached schemas are from the majority of the - // responses. - featureSchemas.keySet().stream().forEach(key -> exportedVersions.add(key.getVersion())); - exportedVersions.stream().forEach(version -> { - builder.append('_'); - builder.append(version); - }); - return builder.toString(); - } - - // Get the updated the feature schema based on the earlybird response from the search cluster. - // . If the existingSchema is not null, the function would return the existing schema. Under the - // situation, we would still check whether the feature in earlybird response is valid. - // . Otherwise, the function would extract the feature schema from the earlybird response. - private ThriftSearchFeatureSchema updateReturnSchemaForClusterResponse( - ThriftSearchFeatureSchema existingSchema, - EarlybirdResponse clusterResponse, - EarlybirdRequest request, - Stats stats) { - // If there is no response or search result for this cluster, do not update returned schema. - if ((clusterResponse == null) || !clusterResponse.isSetSearchResults()) { - return existingSchema; - } - ThriftSearchResults results = clusterResponse.getSearchResults(); - if (results.getResults().isEmpty()) { - return existingSchema; - } - - if (!results.isSetFeatureSchema() || !results.getFeatureSchema().isSetSchemaSpecifier()) { - stats.mapFormatOneDownstreamMissingSchema.increment(); - LOG.error("The downstream response {} is missing feature schema for request {}", - clusterResponse, request); - return existingSchema; - } - - ThriftSearchFeatureSchema schema = results.getFeatureSchema(); - - // Even if existingSchema is already set, we would still try to cache the returned schema. - // In this way, the next time earlybird roots don't have to send the full schema back again. - if (schema.isSetEntries()) { - addNewSchema(schema); - } else if (featureSchemas.containsKey(schema.getSchemaSpecifier())) { - stats.mapFormatSavedSchemaResponses.increment(); - } else { - stats.mapFormatSchemaCachedMismatch.increment(); - LOG.error( - "The feature schema cache misses the schema entry {}, it should cache {} in {}", - schema.getSchemaSpecifier(), request, clusterResponse); - } - - ThriftSearchFeatureSchema updatedSchema = existingSchema; - if (updatedSchema == null) { - updatedSchema = featureSchemas.get(schema.getSchemaSpecifier()); - if (updatedSchema != null) { - Preconditions.checkState(updatedSchema.isSetEntries()); - Preconditions.checkState(updatedSchema.isSetSchemaSpecifier()); - } - } - return updatedSchema; - } - - private ThriftSearchFeatureSchemaSpecifier findMostOccurredSchema( - Stats stats, - EarlybirdRequest request, - List successfulResponses) { - boolean hasResults = false; - Map schemaCount = - Maps.newHashMapWithExpectedSize(successfulResponses.size()); - for (EarlybirdResponse response : successfulResponses) { - if (!response.isSetSearchResults() - || response.getSearchResults().getResultsSize() == 0) { - continue; - } - - hasResults = true; - if (response.getSearchResults().isSetFeatureSchema()) { - ThriftSearchFeatureSchema schema = response.getSearchResults().getFeatureSchema(); - if (schema.isSetSchemaSpecifier()) { - MutableInt cnt = schemaCount.get(schema.getSchemaSpecifier()); - if (cnt != null) { - cnt.increment(); - } else { - schemaCount.put(schema.getSchemaSpecifier(), new MutableInt(1)); - } - - if (schema.isSetEntries()) { - addNewSchema(schema); - } - } - } else { - stats.mapFormatOneDownstreamMissingSchema.increment(); - LOG.error("The downstream response {} is missing feature schema for request {}", - response, request); - } - } - - int numMostOccurred = 0; - ThriftSearchFeatureSchemaSpecifier schemaMostOccurred = null; - for (Map.Entry entry : schemaCount.entrySet()) { - if (entry.getValue().toInteger() > numMostOccurred) { - numMostOccurred = entry.getValue().toInteger(); - schemaMostOccurred = entry.getKey(); - } - } - - if (schemaMostOccurred == null && hasResults) { - stats.mapFormatAllDownstreamMissingSchema.increment(); - LOG.error("None of the downstream host returned feature schema for {}", request); - } - return schemaMostOccurred; - } - - private Stats getOrCreateMergeStat(String statPrefix) { - Stats stats = mergeStats.get(statPrefix); - if (stats == null) { - Stats newStats = new Stats(statPrefix); - stats = mergeStats.putIfAbsent(statPrefix, newStats); - if (stats == null) { - stats = newStats; - } - } - return stats; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.docx b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.docx new file mode 100644 index 000000000..f945aeb7c Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.java deleted file mode 100644 index a9d2840ca..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestContext.java +++ /dev/null @@ -1,227 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import javax.annotation.Nullable; - -import scala.Option; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; - -import com.twitter.common.util.Clock; -import com.twitter.context.thriftscala.Viewer; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.features.thrift.ThriftSearchFeatureSchemaSpecifier; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.ThriftSearchQuery; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; - -/** - * A class that wraps a request and additional per-request data that should be passed to services. - * - * This class should be immutable. At the very least, it must be thread-safe. In practice, since - * EarlybirdRequest is a mutable Thrift structure, the users of this class need to make sure that - * once a request is used to create a RequestContext instance, it is not modified. - * - * If the request needs to be modified, a new RequestContext with the modified EarlybirdRequest - * should be created. - */ -public final class EarlybirdRequestContext { - - private static final String OVERRIDE_TIER_CONFIGS_DECIDER_KEY = "use_override_tier_configs"; - - /** - * Creates a new context with the provided earlybird request, and using the given decider. - */ - public static EarlybirdRequestContext newContext( - EarlybirdRequest request, - SearchDecider decider, - Option twitterContextViewer, - Clock clock) throws QueryParserException { - - // Try to capture created time as early as possible. For example, we want to account for query - // parsing time. - long createdTimeMillis = clock.nowMillis(); - - boolean useOverrideTierConfig = decider.isAvailable(OVERRIDE_TIER_CONFIGS_DECIDER_KEY); - - Query parsedQuery = QueryParsingUtils.getParsedQuery(request); - - return new EarlybirdRequestContext( - request, - parsedQuery, - useOverrideTierConfig, - createdTimeMillis, - twitterContextViewer); - } - - /** - * Intersection of the userID and the flock response, which is set in the followedUserIds field. - * This is used for protected cluster. - */ - public static EarlybirdRequestContext newContextWithRestrictFromUserIdFilter64( - EarlybirdRequestContext requestContext) { - Preconditions.checkArgument(requestContext.getRequest().isSetFollowedUserIds()); - - EarlybirdRequest request = requestContext.getRequest().deepCopy(); - List toIntersect = request.getFollowedUserIds(); - ThriftSearchQuery searchQuery = request.getSearchQuery(); - if (!searchQuery.isSetFromUserIDFilter64()) { - searchQuery.setFromUserIDFilter64(new ArrayList<>(toIntersect)); - } else { - Set intersection = Sets.intersection( - Sets.newHashSet(searchQuery.getFromUserIDFilter64()), - Sets.newHashSet(toIntersect)); - searchQuery.setFromUserIDFilter64(new ArrayList<>(intersection)); - } - - return new EarlybirdRequestContext(requestContext, request, requestContext.getParsedQuery()); - } - - /** - * Makes an exact copy of the provided request context, by cloning the underlying earlybird - * request. - */ - public static EarlybirdRequestContext copyRequestContext( - EarlybirdRequestContext requestContext, - Query parsedQuery) { - return new EarlybirdRequestContext(requestContext, parsedQuery); - } - - /** - * Creates a new context with the provided request, context and reset both the feature schemas - * cached in client and the feature schemas cached in the local cache. - */ - public static EarlybirdRequestContext newContext( - EarlybirdRequest oldRequest, - EarlybirdRequestContext oldRequestContext, - List featureSchemasAvailableInCache, - List featureSchemasAvailableInClient) { - EarlybirdRequest request = oldRequest.deepCopy(); - request.getSearchQuery().getResultMetadataOptions() - .setFeatureSchemasAvailableInClient(featureSchemasAvailableInCache); - - ImmutableSet featureSchemaSetAvailableInClient = null; - if (featureSchemasAvailableInClient != null) { - featureSchemaSetAvailableInClient = ImmutableSet.copyOf(featureSchemasAvailableInClient); - } - - return new EarlybirdRequestContext( - request, - EarlybirdRequestType.of(request), - oldRequestContext.getParsedQuery(), - oldRequestContext.useOverrideTierConfig(), - oldRequestContext.getCreatedTimeMillis(), - oldRequestContext.getTwitterContextViewer(), - featureSchemaSetAvailableInClient); - } - - public EarlybirdRequestContext deepCopy() { - return new EarlybirdRequestContext(request.deepCopy(), parsedQuery, useOverrideTierConfig, - createdTimeMillis, twitterContextViewer); - } - - private final EarlybirdRequest request; - // EarlybirdRequestType should not change for a given request. Computing it once here so that we - // don't need to compute it from the request every time we want to use it. - private final EarlybirdRequestType earlybirdRequestType; - // The parsed query matching the serialized query in the request. May be null if the request does - // not contain a serialized query. - // If a request's serialized query needs to be rewritten for any reason, a new - // EarlybirdRequestContext should be created, with a new EarlybirdRequest (with a new serialized - // query), and a new parsed query (matching the new serialized query). - @Nullable - private final Query parsedQuery; - private final boolean useOverrideTierConfig; - private final long createdTimeMillis; - private final Option twitterContextViewer; - - @Nullable - private final ImmutableSet featureSchemasAvailableInClient; - - private EarlybirdRequestContext( - EarlybirdRequest request, - Query parsedQuery, - boolean useOverrideTierConfig, - long createdTimeMillis, - Option twitterContextViewer) { - this(request, - EarlybirdRequestType.of(request), - parsedQuery, - useOverrideTierConfig, - createdTimeMillis, - twitterContextViewer, - null); - } - - private EarlybirdRequestContext( - EarlybirdRequest request, - EarlybirdRequestType earlybirdRequestType, - Query parsedQuery, - boolean useOverrideTierConfig, - long createdTimeMillis, - Option twitterContextViewer, - @Nullable ImmutableSet featureSchemasAvailableInClient) { - this.request = Preconditions.checkNotNull(request); - this.earlybirdRequestType = earlybirdRequestType; - this.parsedQuery = parsedQuery; - this.useOverrideTierConfig = useOverrideTierConfig; - this.createdTimeMillis = createdTimeMillis; - this.twitterContextViewer = twitterContextViewer; - this.featureSchemasAvailableInClient = featureSchemasAvailableInClient; - } - - private EarlybirdRequestContext(EarlybirdRequestContext otherContext, Query otherParsedQuery) { - this(otherContext, otherContext.getRequest().deepCopy(), otherParsedQuery); - } - - private EarlybirdRequestContext(EarlybirdRequestContext otherContext, - EarlybirdRequest otherRequest, - Query otherParsedQuery) { - this(otherRequest, - otherContext.earlybirdRequestType, - otherParsedQuery, - otherContext.useOverrideTierConfig, - otherContext.createdTimeMillis, - otherContext.twitterContextViewer, - null); - - Preconditions.checkState(request.isSetSearchQuery()); - this.request.getSearchQuery().setSerializedQuery(otherParsedQuery.serialize()); - } - - public EarlybirdRequest getRequest() { - return request; - } - - public boolean useOverrideTierConfig() { - return useOverrideTierConfig; - } - - public EarlybirdRequestType getEarlybirdRequestType() { - return earlybirdRequestType; - } - - @Nullable - public Query getParsedQuery() { - return parsedQuery; - } - - public long getCreatedTimeMillis() { - return createdTimeMillis; - } - - public Option getTwitterContextViewer() { - return twitterContextViewer; - } - - @Nullable - public Set getFeatureSchemasAvailableInClient() { - return featureSchemasAvailableInClient; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.docx b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.docx new file mode 100644 index 000000000..bb6cb1ef3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.java deleted file mode 100644 index 6082135dc..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestType.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import javax.annotation.Nonnull; - -import com.twitter.search.common.constants.thriftjava.ThriftQuerySource; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode; - -/** - * Earlybird roots distinguish these types of requests and treat them differently. - */ -public enum EarlybirdRequestType { - FACETS, - RECENCY, - RELEVANCE, - STRICT_RECENCY, - TERM_STATS, - TOP_TWEETS; - - /** - * Returns the type of the given requests. - */ - @Nonnull - public static EarlybirdRequestType of(EarlybirdRequest request) { - if (request.isSetFacetRequest()) { - return FACETS; - } else if (request.isSetTermStatisticsRequest()) { - return TERM_STATS; - } else if (request.isSetSearchQuery() && request.getSearchQuery().isSetRankingMode()) { - ThriftSearchRankingMode rankingMode = request.getSearchQuery().getRankingMode(); - switch (rankingMode) { - case RECENCY: - if (shouldUseStrictRecency(request)) { - return STRICT_RECENCY; - } else { - return RECENCY; - } - case RELEVANCE: - return RELEVANCE; - case TOPTWEETS: - return TOP_TWEETS; - default: - throw new IllegalArgumentException(); - } - } else { - throw new UnsupportedOperationException(); - } - } - - private static boolean shouldUseStrictRecency(EarlybirdRequest request) { - // For now, we decide to do strict merging solely based on the QuerySource, and only for GNIP. - return request.isSetQuerySource() && request.getQuerySource() == ThriftQuerySource.GNIP; - } - - private final String normalizedName; - - EarlybirdRequestType() { - this.normalizedName = name().toLowerCase(); - } - - /** - * Returns the "normalized" name of this request type, that can be used for stat and decider - * names. - */ - public String getNormalizedName() { - return normalizedName; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.docx b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.docx new file mode 100644 index 000000000..96f675ac2 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.java deleted file mode 100644 index 430aa870c..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdRequestUtil.java +++ /dev/null @@ -1,107 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import com.google.common.base.Optional; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.util.IdTimeRanges; - -public final class EarlybirdRequestUtil { - private static final Logger LOG = LoggerFactory.getLogger(EarlybirdRequestUtil.class); - - private EarlybirdRequestUtil() { - } - - /** - * Returns the max ID specified in the query. The max ID is determined based on the max_id - * operator, and the returned value is an inclusive max ID (that is, the returned response is - * allowed to have a tweet with this ID). - * - * If the query is null, could not be parsed or does not have a max_id operator, Optional.absent() - * is returned. - * - * @param query The query. - * @return The max ID specified in the given query (based on the max_id operator). - */ - public static Optional getRequestMaxId(Query query) { - if (query == null) { - return Optional.absent(); - } - - IdTimeRanges idTimeRanges = null; - try { - idTimeRanges = IdTimeRanges.fromQuery(query); - } catch (QueryParserException e) { - LOG.warn("Exception while getting max_id/until_time from query: " + query, e); - } - - if (idTimeRanges == null) { - // An exception was thrown or the query doesn't accept the boundary operators. - return Optional.absent(); - } - - return idTimeRanges.getMaxIDInclusive(); - } - - /** - * Returns the max ID specified in the query, based on the until_time operator. The returned ID - * is inclusive (that is, the returned response is allowed to have a tweet with this ID). - * - * If the query is null, could not be parsed or does not have an until_time operator, - * Optional.absent() is returned. - * - * @param query The query. - * @return The max ID specified in the given query (based on the until_time operator). - */ - public static Optional getRequestMaxIdFromUntilTime(Query query) { - if (query == null) { - return Optional.absent(); - } - - IdTimeRanges idTimeRanges = null; - try { - idTimeRanges = IdTimeRanges.fromQuery(query); - } catch (QueryParserException e) { - LOG.warn("Exception while getting max_id/until_time from query: " + query, e); - } - - if (idTimeRanges == null) { - // An exception was thrown or the query doesn't accept the boundary operators. - return Optional.absent(); - } - - Optional queryUntilTimeExclusive = idTimeRanges.getUntilTimeExclusive(); - Optional maxId = Optional.absent(); - if (queryUntilTimeExclusive.isPresent()) { - long timestampMillis = queryUntilTimeExclusive.get() * 1000L; - if (SnowflakeIdParser.isUsableSnowflakeTimestamp(timestampMillis)) { - // Convert timestampMillis to an ID, and subtract 1, because the until_time operator is - // exclusive, and we need to return an inclusive max ID. - maxId = Optional.of(SnowflakeIdParser.generateValidStatusId(timestampMillis, 0) - 1); - } - } - return maxId; - } - - /** - * Creates a copy of the given EarlybirdRequest and unsets all fields that are used - * only by the SuperRoot. - */ - public static EarlybirdRequest unsetSuperRootFields( - EarlybirdRequest request, boolean unsetFollowedUserIds) { - EarlybirdRequest newRequest = request.deepCopy(); - newRequest.unsetGetOlderResults(); - newRequest.unsetGetProtectedTweetsOnly(); - if (unsetFollowedUserIds) { - newRequest.unsetFollowedUserIds(); - } - newRequest.unsetAdjustedProtectedRequestParams(); - newRequest.unsetAdjustedFullArchiveRequestParams(); - return newRequest; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.docx b/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.docx new file mode 100644 index 000000000..d951797a6 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.java deleted file mode 100644 index 476cedc72..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdServiceResponse.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import javax.annotation.Nullable; - -import com.google.common.base.Preconditions; - -import com.twitter.search.earlybird.thrift.EarlybirdResponse; - -/** - * A class that wraps an EarlybirdResponse and a flag that determines if a request was sent to a - * service. - */ -public final class EarlybirdServiceResponse { - public static enum ServiceState { - // The service was called (or will be called). - SERVICE_CALLED(true), - - // The service is not available (turned off by a decider, for example). - SERVICE_NOT_AVAILABLE(false), - - // The client did not request results from this service. - SERVICE_NOT_REQUESTED(false), - - // The service is available and the client wants results from this service, but the service - // was not called (because we got enough results from other services, for example). - SERVICE_NOT_CALLED(false); - - private final boolean serviceWasCalled; - - private ServiceState(boolean serviceWasCalled) { - this.serviceWasCalled = serviceWasCalled; - } - - public boolean serviceWasCalled() { - return serviceWasCalled; - } - - public boolean serviceWasRequested() { - return this != SERVICE_NOT_REQUESTED; - } - - } - - private final EarlybirdResponse earlybirdResponse; - private final ServiceState serviceState; - - private EarlybirdServiceResponse(@Nullable EarlybirdResponse earlybirdResponse, - ServiceState serviceState) { - this.earlybirdResponse = earlybirdResponse; - this.serviceState = serviceState; - if (!serviceState.serviceWasCalled()) { - Preconditions.checkArgument(earlybirdResponse == null); - } - } - - /** - * Creates a new EarlybirdServiceResponse instance, indicating that the service was not called. - * - * @param serviceState The state of the service. - * @return a new EarlybirdServiceResponse instance, indicating that the service was not called. - */ - public static EarlybirdServiceResponse serviceNotCalled(ServiceState serviceState) { - Preconditions.checkArgument(!serviceState.serviceWasCalled()); - return new EarlybirdServiceResponse(null, serviceState); - } - - /** - * Creates a new EarlybirdServiceResponse instance that wraps the given earlybird response. - * - * @param earlybirdResponse The EarlybirdResponse instance returned by the service. - * @return a new EarlybirdServiceResponse instance that wraps the given earlybird response. - */ - public static EarlybirdServiceResponse serviceCalled(EarlybirdResponse earlybirdResponse) { - return new EarlybirdServiceResponse(earlybirdResponse, ServiceState.SERVICE_CALLED); - } - - /** Returns the wrapped earlybird response. */ - @Nullable - public EarlybirdResponse getResponse() { - return earlybirdResponse; - } - - /** Returns the state of the service. */ - public ServiceState getServiceState() { - return serviceState; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/InjectionNames.docx b/src/java/com/twitter/search/earlybird_root/common/InjectionNames.docx new file mode 100644 index 000000000..8549d93f5 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/InjectionNames.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/InjectionNames.java b/src/java/com/twitter/search/earlybird_root/common/InjectionNames.java deleted file mode 100644 index 8662a1d7b..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/InjectionNames.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -public final class InjectionNames { - - public static final String FULL_ARCHIVE = "full_archive"; - public static final String REALTIME = "realtime"; - public static final String PROTECTED = "protected"; - - private InjectionNames() { } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.docx b/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.docx new file mode 100644 index 000000000..549b22c74 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java b/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java deleted file mode 100644 index 0df98b34e..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java +++ /dev/null @@ -1,86 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import java.util.concurrent.TimeUnit; - -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.queryparser.parser.SerializedQueryParser; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.util.Future; - -/** - * Common utils for parsing serialized queries, and handling query parser exceptions. - */ -public final class QueryParsingUtils { - - private static final Logger LOG = LoggerFactory.getLogger(QueryParsingUtils.class); - - @VisibleForTesting - public static final SearchCounter QUERYPARSE_COUNT = - SearchCounter.export("root_queryparse_count"); - private static final SearchTimerStats QUERYPARSE_TIMER = - SearchTimerStats.export("root_queryparse_time", TimeUnit.NANOSECONDS, false, true); - private static final SearchCounter NO_PARSED_QUERY_COUNT = - SearchCounter.export("root_no_parsed_query_count"); - - private QueryParsingUtils() { } - - /** - * Takes an earlybird request, and parses its serialized query (if it is set). - * Expects the required ThriftSearchQuery to be set on the passed in EarlybirdRequest. - * - * @param request the earlybird request to parse. - * @return null if the request does not specify a serialized query. - * @throws QueryParserException if querry parsing fails. - */ - @Nullable - static Query getParsedQuery(EarlybirdRequest request) throws QueryParserException { - // searchQuery is required on EarlybirdRequest. - Preconditions.checkState(request.isSetSearchQuery()); - Query parsedQuery; - if (request.getSearchQuery().isSetSerializedQuery()) { - long startTime = System.nanoTime(); - try { - String serializedQuery = request.getSearchQuery().getSerializedQuery(); - - parsedQuery = new SerializedQueryParser().parse(serializedQuery); - } finally { - QUERYPARSE_COUNT.increment(); - QUERYPARSE_TIMER.timerIncrement(System.nanoTime() - startTime); - } - } else { - NO_PARSED_QUERY_COUNT.increment(); - parsedQuery = null; - } - return parsedQuery; - } - - /** - * Creates a new EarlybirdResponse with a CLIENT_ERROR response code, to be used as a response - * to a request where we failed to parse a user passed in serialized query. - */ - public static Future newClientErrorResponse( - EarlybirdRequest request, - QueryParserException e) { - - String msg = "Failed to parse query"; - LOG.warn(msg, e); - - EarlybirdResponse errorResponse = - new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0); - errorResponse.setDebugString(msg + ": " + e.getMessage()); - return Future.value(errorResponse); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.docx b/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.docx new file mode 100644 index 000000000..1adcc3d62 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.java b/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.java deleted file mode 100644 index 50b40bb41..000000000 --- a/src/java/com/twitter/search/earlybird_root/common/TwitterContextProvider.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.search.earlybird_root.common; - -import javax.inject.Singleton; - -import scala.Option; - -import com.twitter.context.TwitterContext; -import com.twitter.context.thriftscala.Viewer; -import com.twitter.search.TwitterContextPermit; - -/** - * This class is needed to provide an easy way for unit tests to "inject" - * a TwitterContext Viewer - */ -@Singleton -public class TwitterContextProvider { - public Option get() { - return TwitterContext.acquire(TwitterContextPermit.get()).apply(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/config/BUILD.bazel b/src/java/com/twitter/search/earlybird_root/config/BUILD.bazel deleted file mode 100644 index 48896ca1c..000000000 --- a/src/java/com/twitter/search/earlybird_root/config/BUILD.bazel +++ /dev/null @@ -1,7 +0,0 @@ -java_library( - sources = ["*.java"], - dependencies = [ - "src/java/com/twitter/common/util:system-mocks", - "src/java/com/twitter/search/earlybird/config", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/config/BUILD.docx b/src/java/com/twitter/search/earlybird_root/config/BUILD.docx new file mode 100644 index 000000000..7386a9948 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/config/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.docx b/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.docx new file mode 100644 index 000000000..a9734d843 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.java b/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.java deleted file mode 100644 index 00c72d4e5..000000000 --- a/src/java/com/twitter/search/earlybird_root/config/RootClusterBoundaryInfo.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.twitter.search.earlybird_root.config; - -import java.util.Date; - -import com.twitter.common.util.Clock; -import com.twitter.search.earlybird.config.ServingRange; -import com.twitter.search.earlybird.config.TierServingBoundaryEndPoint; - -/** - * Time boundary information for a root cluster. - * Used by EarlybirdTimeRangeFilter. - */ -public class RootClusterBoundaryInfo implements ServingRange { - - private final TierServingBoundaryEndPoint servingRangeSince; - private final TierServingBoundaryEndPoint servingRangeMax; - - /** - * Build a time boundary information - */ - public RootClusterBoundaryInfo( - Date startDate, - Date clusterEndDate, - String sinceIdBoundaryString, - String maxIdBoundaryString, - Clock clock) { - this.servingRangeSince = TierServingBoundaryEndPoint - .newTierServingBoundaryEndPoint(sinceIdBoundaryString, startDate, clock); - this.servingRangeMax = TierServingBoundaryEndPoint - .newTierServingBoundaryEndPoint(maxIdBoundaryString, clusterEndDate, clock); - } - - public long getServingRangeSinceId() { - return servingRangeSince.getBoundaryTweetId(); - } - - public long getServingRangeMaxId() { - return servingRangeMax.getBoundaryTweetId(); - } - - public long getServingRangeSinceTimeSecondsFromEpoch() { - return servingRangeSince.getBoundaryTimeSecondsFromEpoch(); - } - - public long getServingRangeUntilTimeSecondsFromEpoch() { - return servingRangeMax.getBoundaryTimeSecondsFromEpoch(); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/BUILD b/src/java/com/twitter/search/earlybird_root/filters/BUILD deleted file mode 100644 index 464d15a80..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -java_library( - sources = ["*.java"], - platform = "java8", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/commons-io", - "3rdparty/jvm/org/slf4j:slf4j-api", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/antlr/com/twitter/search/queryparser/antlr:queryparser-antlr", - "src/java/com/google/common/util/concurrent", - "src/java/com/twitter/common/collections", - "src/java/com/twitter/common/text/language:locale-util", - "src/java/com/twitter/common/util:system-mocks", - "src/java/com/twitter/common_internal/text/version", - "src/java/com/twitter/search/common/clientstats", - "src/java/com/twitter/search/common/decider", - "src/java/com/twitter/search/common/metrics", - "src/java/com/twitter/search/common/partitioning/snowflakeparser", - "src/java/com/twitter/search/common/root", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/common/util:finagleutil", - "src/java/com/twitter/search/common/util/date", - "src/java/com/twitter/search/common/util/earlybird", - "src/java/com/twitter/search/common/util/io/periodic", - "src/java/com/twitter/search/common/util/lang", - "src/java/com/twitter/search/common/util/thrift:text-protocol", - "src/java/com/twitter/search/earlybird/common", - "src/java/com/twitter/search/earlybird/config", - "src/java/com/twitter/search/earlybird_root/common", - "src/java/com/twitter/search/earlybird_root/quota", - "src/java/com/twitter/search/earlybird_root/validators", - "src/java/com/twitter/search/queryparser", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/search:earlybird-java", - ], -) diff --git a/src/java/com/twitter/search/earlybird_root/filters/BUILD.docx b/src/java/com/twitter/search/earlybird_root/filters/BUILD.docx new file mode 100644 index 000000000..9d0a1f079 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/BUILD.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.docx new file mode 100644 index 000000000..fa6a5906a Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.java b/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.java deleted file mode 100644 index bb7e7b1ab..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/ClientIdArchiveAccessFilter.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Optional; - -import javax.inject.Inject; - -import com.google.common.base.Preconditions; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird_root.quota.ClientIdQuotaManager; -import com.twitter.search.earlybird_root.quota.QuotaInfo; -import com.twitter.util.Future; - -public class ClientIdArchiveAccessFilter extends SimpleFilter { - private static final String UNAUTHORIZED_ARCHIVE_ACCESS_COUNTER_PATTERN = - "unauthorized_access_to_full_archive_by_client_%s"; - - private final ClientIdQuotaManager quotaManager; - - /** - * Construct the filter by using ClientIdQuotaManager - */ - @Inject - public ClientIdArchiveAccessFilter(ClientIdQuotaManager quotaManager) { - this.quotaManager = Preconditions.checkNotNull(quotaManager); - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - String clientId = ClientIdUtil.getClientIdFromRequest(request); - - Optional quotaInfoOptional = quotaManager.getQuotaForClient(clientId); - QuotaInfo quotaInfo = quotaInfoOptional.orElseGet(quotaManager::getCommonPoolQuota); - if (!quotaInfo.hasArchiveAccess() && request.isGetOlderResults()) { - SearchCounter unauthorizedArchiveAccessCounter = SearchCounter.export( - String.format(UNAUTHORIZED_ARCHIVE_ACCESS_COUNTER_PATTERN, clientId)); - unauthorizedArchiveAccessCounter.increment(); - - String message = String.format( - "Client %s is not whitelisted for archive access. Request access at go/searchquota.", - clientId); - EarlybirdResponse response = new EarlybirdResponse( - EarlybirdResponseCode.QUOTA_EXCEEDED_ERROR, 0) - .setDebugString(message); - return Future.value(response); - } - return service.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.docx new file mode 100644 index 000000000..e57258c5a Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.java b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.java deleted file mode 100644 index 750b39198..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQueryOperatorStatsFilter.java +++ /dev/null @@ -1,129 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Arrays; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.clientstats.RequestCounters; -import com.twitter.search.common.clientstats.RequestCountersEventListener; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.query.search.SearchOperator; -import com.twitter.search.queryparser.visitors.DetectVisitor; -import com.twitter.util.Future; - -/** -* This filter exports RequestCounters stats for each unique combination of client_id and -* query_operator. RequestCounters produce 19 stats for each prefix, and we have numerous -* clients and operators, so this filter can produce a large number of stats. To keep the -* number of exported stats reasonable we use an allow list of operators. The list currently -* includes the geo operators while we monitor the impacts of realtime geo filtering. See -* SEARCH-33699 for project details. -* -* To find the stats look for query_client_operator_* exported by archive roots. -* - **/ - -public class ClientIdQueryOperatorStatsFilter - extends SimpleFilter { - - private static final Logger LOG = LoggerFactory.getLogger(ClientIdQueryOperatorStatsFilter.class); - - public static final String COUNTER_PREFIX_PATTERN = "query_client_operator_%s_%s"; - private final Clock clock; - private final ConcurrentMap requestCountersByClientIdAndOperator = - new ConcurrentHashMap<>(); - private final Set operatorsToRecordStatsFor = new HashSet<>(Arrays.asList( - SearchOperator.Type.GEO_BOUNDING_BOX, - SearchOperator.Type.GEOCODE, - SearchOperator.Type.GEOLOCATION_TYPE, - SearchOperator.Type.NEAR, - SearchOperator.Type.PLACE, - SearchOperator.Type.WITHIN)); - - public ClientIdQueryOperatorStatsFilter() { - this.clock = Clock.SYSTEM_CLOCK; - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - EarlybirdRequest req = requestContext.getRequest(); - Query parsedQuery = requestContext.getParsedQuery(); - - if (parsedQuery == null) { - return service.apply(requestContext); - } - - Set operators = getOperators(parsedQuery); - Future response = service.apply(requestContext); - for (SearchOperator.Type operator : operators) { - - RequestCounters clientOperatorCounters = getClientOperatorCounters(req.clientId, operator); - RequestCountersEventListener clientOperatorCountersEventListener = - new RequestCountersEventListener<>( - clientOperatorCounters, clock, EarlybirdSuccessfulResponseHandler.INSTANCE); - - response = response.addEventListener(clientOperatorCountersEventListener); - } - return response; - } - - /** - * Gets or creates RequestCounters for the given clientId and operatorType - */ - private RequestCounters getClientOperatorCounters(String clientId, - SearchOperator.Type operatorType) { - String counterPrefix = String.format(COUNTER_PREFIX_PATTERN, clientId, operatorType.toString()); - RequestCounters clientCounters = requestCountersByClientIdAndOperator.get(counterPrefix); - if (clientCounters == null) { - clientCounters = new RequestCounters(counterPrefix); - RequestCounters existingCounters = - requestCountersByClientIdAndOperator.putIfAbsent(counterPrefix, clientCounters); - if (existingCounters != null) { - clientCounters = existingCounters; - } - } - return clientCounters; - } - - /** - * Returns a set of the SearchOperator types that are: - * 1) used by the query - * 2) included in the allow list: operatorsToRecordStatsFor - */ - private Set getOperators(Query parsedQuery) { - final DetectVisitor detectVisitor = new DetectVisitor(false, SearchOperator.Type.values()); - Set detectedOperatorTypes = EnumSet.noneOf(SearchOperator.Type.class); - - try { - parsedQuery.accept(detectVisitor); - } catch (QueryParserException e) { - LOG.error("Failed to detect SearchOperators in query: " + parsedQuery.toString()); - return detectedOperatorTypes; - } - - for (Query query : detectVisitor.getDetectedQueries()) { - // This detectVisitor only matches on SearchOperators. - SearchOperator operator = (SearchOperator) query; - SearchOperator.Type operatorType = operator.getOperatorType(); - if (operatorsToRecordStatsFor.contains(operatorType)) { - detectedOperatorTypes.add(operatorType); - } - } - return detectedOperatorTypes; - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.docx new file mode 100644 index 000000000..8ddace9b3 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.java b/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.java deleted file mode 100644 index 828f92ad4..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/ClientIdQuotaFilter.java +++ /dev/null @@ -1,274 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import javax.inject.Inject; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.util.concurrent.RateLimiterProxy; -import com.google.common.util.concurrent.TwitterRateLimiterProxyFactory; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.metrics.SearchCustomGauge; -import com.twitter.search.common.metrics.SearchLongGauge; -import com.twitter.search.common.metrics.SearchRateCounter; -import com.twitter.search.common.util.FinagleUtil; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird_root.quota.ClientIdQuotaManager; -import com.twitter.search.earlybird_root.quota.QuotaInfo; -import com.twitter.util.Future; - -/** - * A filter that tracks and limits the per-client request rate. The ID of the client is determined - * by looking at the Finagle client ID and the EarlybirdRequest.clientId field. - * - * The configuration currently has one config based implementation: see ConfigRepoBasedQuotaManager. - * - * If a client has a quota set, this filter will rate limit the requests from that client based on - * that quota. Otherwise, the client is assumed to use a "common request pool", which has its own - * quota. A quota for the common pool must always exist (even if it's set to 0). - * - * All rate limiters used in this class are tolerant to bursts. See TwitterRateLimiterFactory for - * more details. - * - * If a client sends us more requests than its allowed quota, we keep track of the excess traffic - * and export that number in a counter. However, we rate limit the requests from that client only if - * the QuotaInfo returned from ClientIdQuotaManager has the shouldEnforceQuota property set to true. - * - * If a request is rate limited, the filter will return an EarlybirdResponse with a - * QUOTA_EXCEEDED_ERROR response code. - */ -public class ClientIdQuotaFilter extends SimpleFilter { - private static final class ClientQuota { - private final QuotaInfo quotaInfo; - private final boolean shouldAllowRequest; - private final ClientIdRequestCounters requestCounters; - - private ClientQuota( - QuotaInfo quotaInfo, - boolean shouldAllowRequest, - ClientIdRequestCounters requestCounters) { - - this.quotaInfo = quotaInfo; - this.shouldAllowRequest = shouldAllowRequest; - this.requestCounters = requestCounters; - } - } - - private static final class ClientIdRequestCounters { - private static final String REQUESTS_RECEIVED_COUNTER_NAME_PATTERN = - "quota_requests_received_for_client_id_%s"; - - private static final String THROTTLED_REQUESTS_COUNTER_NAME_PATTERN = - "quota_requests_throttled_for_client_id_%s"; - - private static final String REQUESTS_ABOVE_QUOTA_COUNTER_NAME_PATTERN = - "quota_requests_above_quota_for_client_id_%s"; - - private static final String REQUESTS_WITHIN_QUOTA_COUNTER_NAME_PATTERN = - "quota_requests_within_quota_for_client_id_%s"; - - private static final String PER_CLIENT_QUOTA_GAUGE_NAME_PATTERN = - "quota_for_client_id_%s"; - - private final SearchRateCounter throttledRequestsCounter; - private final SearchRateCounter requestsReceivedCounter; - private final SearchRateCounter requestsAboveQuotaCounter; - private final SearchRateCounter requestsWithinQuotaCounter; - private final SearchLongGauge quotaClientGauge; - - private ClientIdRequestCounters(String clientId) { - this.throttledRequestsCounter = SearchRateCounter.export( - String.format(THROTTLED_REQUESTS_COUNTER_NAME_PATTERN, clientId)); - - this.requestsReceivedCounter = SearchRateCounter.export( - String.format(REQUESTS_RECEIVED_COUNTER_NAME_PATTERN, clientId), true); - - this.quotaClientGauge = SearchLongGauge.export( - String.format(PER_CLIENT_QUOTA_GAUGE_NAME_PATTERN, clientId)); - - this.requestsAboveQuotaCounter = SearchRateCounter.export( - String.format(REQUESTS_ABOVE_QUOTA_COUNTER_NAME_PATTERN, clientId)); - - this.requestsWithinQuotaCounter = SearchRateCounter.export( - String.format(REQUESTS_WITHIN_QUOTA_COUNTER_NAME_PATTERN, clientId)); - } - } - - private static final String REQUESTS_RECEIVED_FOR_EMAIL_COUNTER_NAME_PATTERN = - "quota_requests_received_for_email_%s"; - - // We have this aggregate stat only because doing sumany(...) on the - // per-client statistic is too expensive for an alert. - @VisibleForTesting - static final SearchRateCounter TOTAL_REQUESTS_RECEIVED_COUNTER = - SearchRateCounter.export("total_quota_requests_received", true); - - private static final int DEFAULT_BURST_FACTOR_SECONDS = 60; - private static final String QUOTA_STAT_CACHE_SIZE = "quota_stat_cache_size"; - private static final String MISSING_QUOTA_FOR_CLIENT_ID_COUNTER_NAME_PATTERN = - "quota_requests_with_missing_quota_for_client_id_%s"; - - private static final Logger LOG = LoggerFactory.getLogger(ClientIdQuotaFilter.class); - - private final ConcurrentMap rateLimiterProxiesByClientId = - new ConcurrentHashMap<>(); - - private final ClientIdQuotaManager quotaManager; - private final TwitterRateLimiterProxyFactory rateLimiterProxyFactory; - private final LoadingCache clientRequestCounters; - private final LoadingCache emailRequestCounters; - - /** Creates a new ClientIdQuotaFilter instance. */ - @Inject - public ClientIdQuotaFilter(ClientIdQuotaManager quotaManager, - TwitterRateLimiterProxyFactory rateLimiterProxyFactory) { - this.quotaManager = quotaManager; - this.rateLimiterProxyFactory = rateLimiterProxyFactory; - - this.clientRequestCounters = CacheBuilder.newBuilder() - .build(new CacheLoader() { - @Override - public ClientIdRequestCounters load(String clientId) { - return new ClientIdRequestCounters(clientId); - } - }); - this.emailRequestCounters = CacheBuilder.newBuilder() - .build(new CacheLoader() { - @Override - public SearchRateCounter load(String email) { - return SearchRateCounter.export( - String.format(REQUESTS_RECEIVED_FOR_EMAIL_COUNTER_NAME_PATTERN, email)); - } - }); - - SearchCustomGauge.export(QUOTA_STAT_CACHE_SIZE, () -> clientRequestCounters.size()); - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - String finagleClientId = FinagleUtil.getFinagleClientName(); - String requestClientId = ClientIdUtil.getClientIdFromRequest(request); - LOG.debug(String.format("Client id from request or attribution: %s", requestClientId)); - - // Multiple client ids may be grouped into a single quota client id, all the - // unknown or unset client ids for example. - String quotaClientId = ClientIdUtil.getQuotaClientId(requestClientId); - LOG.debug(String.format("Client id used for checking quota: %s", quotaClientId)); - - ClientQuota clientQuota = getClientQuota(quotaClientId); - if (!clientQuota.shouldAllowRequest && clientQuota.quotaInfo.shouldEnforceQuota()) { - clientQuota.requestCounters.throttledRequestsCounter.increment(); - - return Future.value(getQuotaExceededResponse( - finagleClientId, - clientQuota.quotaInfo.getQuotaClientId(), - clientQuota.quotaInfo.getQuota())); - } - - return service.apply(request); - } - - private ClientQuota getClientQuota(String clientId) { - Optional quotaInfoOptional = quotaManager.getQuotaForClient(clientId); - if (!quotaInfoOptional.isPresent()) { - SearchRateCounter noQuotaFoundForClientCounter = SearchRateCounter.export( - String.format(MISSING_QUOTA_FOR_CLIENT_ID_COUNTER_NAME_PATTERN, clientId)); - noQuotaFoundForClientCounter.increment(); - } - - // If a quota was set for this client, use it. Otherwise, use the common pool's quota. - // A quota for the common pool must always exist. - QuotaInfo quotaInfo = quotaInfoOptional.orElseGet(quotaManager::getCommonPoolQuota); - - ClientIdRequestCounters requestCounters = clientRequestCounters - .getUnchecked(quotaInfo.getQuotaClientId()); - emailRequestCounters.getUnchecked(quotaInfo.getQuotaEmail()).increment(); - - // Increment a stat for each request the filter receives. - requestCounters.requestsReceivedCounter.increment(); - - // Also increment the total stat - TOTAL_REQUESTS_RECEIVED_COUNTER.increment(); - - // If shouldEnforceQuota is false, we already know that the request will be allowed. - // However, we still want to update the rate limiter and the stats. - final boolean requestAllowed; - if (quotaInfo.getQuota() == 0) { - // If the quota for this client is set to 0, then the request should not be allowed. - // - // Do not update the rate limiter's rate: RateLimiter only accepts positive rates, and in any - // case, we already know that the request should not be allowed. - requestAllowed = false; - } else { - // The quota is not 0: update the rate limiter with the new quota, and see if the request - // should be allowed. - RateLimiterProxy rateLimiterProxy = getClientRateLimiterProxy(quotaInfo.getQuotaClientId(), - quotaInfo.getQuota()); - requestAllowed = rateLimiterProxy.tryAcquire(); - } - - // Report the current quota for each client - requestCounters.quotaClientGauge.set(quotaInfo.getQuota()); - - // Update the corresponding counter, if the request should not be allowed. - if (!requestAllowed) { - requestCounters.requestsAboveQuotaCounter.increment(); - } else { - requestCounters.requestsWithinQuotaCounter.increment(); - } - - // Throttle the request only if the quota for this service should be enforced. - return new ClientQuota(quotaInfo, requestAllowed, requestCounters); - } - - private RateLimiterProxy getClientRateLimiterProxy(String clientId, int rate) { - // If a RateLimiter for this client doesn't exist, create one, - // unless another thread beat us to it. - RateLimiterProxy clientRateLimiterProxy = rateLimiterProxiesByClientId.get(clientId); - if (clientRateLimiterProxy == null) { - clientRateLimiterProxy = - rateLimiterProxyFactory.createRateLimiterProxy(rate, DEFAULT_BURST_FACTOR_SECONDS); - RateLimiterProxy existingClientRateLimiterProxy = - rateLimiterProxiesByClientId.putIfAbsent(clientId, clientRateLimiterProxy); - if (existingClientRateLimiterProxy != null) { - clientRateLimiterProxy = existingClientRateLimiterProxy; - } - LOG.info("Using rate limiter with rate {} for clientId {}.", - clientRateLimiterProxy.getRate(), clientId); - } - - // Update the quota, if needed. - if (clientRateLimiterProxy.getRate() != rate) { - LOG.info("Updating the rate from {} to {} for clientId {}.", - clientRateLimiterProxy.getRate(), rate, clientId); - clientRateLimiterProxy.setRate(rate); - } - - return clientRateLimiterProxy; - } - - private static EarlybirdResponse getQuotaExceededResponse( - String finagleClientId, String quotaClientId, int quota) { - return new EarlybirdResponse(EarlybirdResponseCode.QUOTA_EXCEEDED_ERROR, 0) - .setSearchResults(new ThriftSearchResults()) - .setDebugString(String.format( - "Client %s (finagle client ID %s) has exceeded its request quota of %d. " - + "Please request more quota at go/searchquota.", - quotaClientId, finagleClientId, quota)); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.docx new file mode 100644 index 000000000..1d92b1286 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.java b/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.java deleted file mode 100644 index 7da53ae4c..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/ClientIdTrackingFilter.java +++ /dev/null @@ -1,148 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -import javax.inject.Inject; - -import com.google.common.annotations.VisibleForTesting; - -import com.twitter.common.collections.Pair; -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.clientstats.RequestCounters; -import com.twitter.search.common.clientstats.RequestCountersEventListener; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.util.FinagleUtil; -import com.twitter.search.common.util.earlybird.ThriftSearchQueryUtil; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.util.Future; - -/** Tracks the number of queries we get from each client. */ -public class ClientIdTrackingFilter extends SimpleFilter { - // Be careful when changing the names of these stats or adding new ones: make sure that they have - // prefixes/suffixes that will allow us to group them in Viz, without pulling in other stats. - // For example, we'll probably have a Viz graph for client_id_tracker_qps_for_client_id_*_all. - // So if you add a new stat named client_id_tracker_qps_for_client_id_%s_and_new_field_%s_all, - // then the graph will be grouping up the values from both stats, instead of grouping up the - // values only for client_id_tracker_qps_for_client_id_%s_all. - @VisibleForTesting - static final String QPS_ALL_STAT_PATTERN = "client_id_tracker_qps_for_%s_all"; - - @VisibleForTesting - static final String QPS_LOGGED_IN_STAT_PATTERN = "client_id_tracker_qps_for_%s_logged_in"; - - @VisibleForTesting - static final String QPS_LOGGED_OUT_STAT_PATTERN = "client_id_tracker_qps_for_%s_logged_out"; - - static final String SUPERROOT_REJECT_REQUESTS_WITH_UNKNOWN_FINAGLE_ID = - "superroot_reject_requests_with_unknown_finagle_id"; - - static final String UNKNOWN_FINAGLE_ID_DEBUG_STRING = "Please specify a finagle client id."; - - private final ConcurrentMap requestCountersByClientId = - new ConcurrentHashMap<>(); - private final ConcurrentMap, RequestCounters> - requestCountersByFinagleIdAndClientId = new ConcurrentHashMap<>(); - private final Clock clock; - private final SearchDecider decider; - - @Inject - public ClientIdTrackingFilter(SearchDecider decider) { - this(decider, Clock.SYSTEM_CLOCK); - } - - @VisibleForTesting - ClientIdTrackingFilter(SearchDecider decider, Clock clock) { - this.decider = decider; - this.clock = clock; - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - String clientId = ClientIdUtil.getClientIdFromRequest(request); - String finagleId = FinagleUtil.getFinagleClientName(); - boolean isLoggedIn = ThriftSearchQueryUtil.requestInitiatedByLoggedInUser(request); - incrementCounters(clientId, finagleId, isLoggedIn); - - if (decider.isAvailable(SUPERROOT_REJECT_REQUESTS_WITH_UNKNOWN_FINAGLE_ID) - && finagleId.equals(FinagleUtil.UNKNOWN_CLIENT_NAME)) { - EarlybirdResponse response = new EarlybirdResponse( - EarlybirdResponseCode.QUOTA_EXCEEDED_ERROR, 0) - .setDebugString(UNKNOWN_FINAGLE_ID_DEBUG_STRING); - return Future.value(response); - } - - RequestCounters clientCounters = getClientCounters(clientId); - RequestCountersEventListener clientCountersEventListener = - new RequestCountersEventListener<>( - clientCounters, clock, EarlybirdSuccessfulResponseHandler.INSTANCE); - RequestCounters finagleIdAndClientCounters = getFinagleIdClientCounters(clientId, finagleId); - RequestCountersEventListener finagleIdAndClientCountersEventListener = - new RequestCountersEventListener<>( - finagleIdAndClientCounters, clock, EarlybirdSuccessfulResponseHandler.INSTANCE); - - return service.apply(request) - .addEventListener(clientCountersEventListener) - .addEventListener(finagleIdAndClientCountersEventListener); - } - - // Returns the RequestCounters instance tracking the requests from the given client ID. - private RequestCounters getClientCounters(String clientId) { - RequestCounters clientCounters = requestCountersByClientId.get(clientId); - if (clientCounters == null) { - clientCounters = new RequestCounters(ClientIdUtil.formatClientId(clientId)); - RequestCounters existingCounters = - requestCountersByClientId.putIfAbsent(clientId, clientCounters); - if (existingCounters != null) { - clientCounters = existingCounters; - } - } - return clientCounters; - } - - // Returns the RequestCounters instance tracking the requests from the given client ID. - private RequestCounters getFinagleIdClientCounters(String clientId, String finagleId) { - Pair clientKey = Pair.of(clientId, finagleId); - RequestCounters counters = requestCountersByFinagleIdAndClientId.get(clientKey); - if (counters == null) { - counters = new RequestCounters(ClientIdUtil.formatFinagleClientIdAndClientId( - finagleId, clientId)); - RequestCounters existingCounters = requestCountersByFinagleIdAndClientId.putIfAbsent( - clientKey, counters); - if (existingCounters != null) { - counters = existingCounters; - } - } - return counters; - } - - // Increments the correct counters, based on the given clientId, finagleId, and whether or not the - // request came from a logged in user. - private static void incrementCounters(String clientId, String finagleId, boolean isLoggedIn) { - String clientIdForStats = ClientIdUtil.formatClientId(clientId); - String finagleClientIdAndClientIdForStats = - ClientIdUtil.formatFinagleClientIdAndClientId(finagleId, clientId); - SearchCounter.export(String.format(QPS_ALL_STAT_PATTERN, clientIdForStats)).increment(); - SearchCounter.export(String.format(QPS_ALL_STAT_PATTERN, finagleClientIdAndClientIdForStats)) - .increment(); - if (isLoggedIn) { - SearchCounter.export(String.format(QPS_LOGGED_IN_STAT_PATTERN, clientIdForStats)).increment(); - SearchCounter.export( - String.format(QPS_LOGGED_IN_STAT_PATTERN, finagleClientIdAndClientIdForStats)) - .increment(); - } else { - SearchCounter.export(String.format(QPS_LOGGED_OUT_STAT_PATTERN, clientIdForStats)) - .increment(); - SearchCounter.export( - String.format(QPS_LOGGED_OUT_STAT_PATTERN, finagleClientIdAndClientIdForStats)) - .increment(); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.docx new file mode 100644 index 000000000..f126f6eaf Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.java b/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.java deleted file mode 100644 index b19da2819..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/ClientRequestTimeFilter.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import javax.inject.Inject; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.util.Future; - -/** A filter that sets the EarlybirdRequest.clientRequestTimeMs field if it's not already set. */ -public class ClientRequestTimeFilter extends SimpleFilter { - private static final SearchCounter CLIENT_REQUEST_TIME_MS_UNSET_COUNTER = - SearchCounter.export("client_request_time_ms_unset"); - - private final Clock clock; - - @Inject - public ClientRequestTimeFilter(Clock clock) { - this.clock = clock; - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - if (!request.isSetClientRequestTimeMs()) { - CLIENT_REQUEST_TIME_MS_UNSET_COUNTER.increment(); - request.setClientRequestTimeMs(clock.nowMillis()); - } - return service.apply(request); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.docx new file mode 100644 index 000000000..24428215d Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.java b/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.java deleted file mode 100644 index 0ac1a2113..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/DeadlineTimeoutStatsFilter.java +++ /dev/null @@ -1,188 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.concurrent.TimeUnit; -import javax.inject.Inject; - -import scala.Option; - -import com.google.common.base.Preconditions; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; - -import com.twitter.common.util.Clock; -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.finagle.context.Contexts$; -import com.twitter.finagle.context.Deadline; -import com.twitter.finagle.context.Deadline$; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.metrics.SearchTimerStats; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.util.Future; - -/** - * A filter for comparing the request deadline (set in the finagle request context) with the request - * timeout, as set in the EarlybirdRequest. - * - * Tracks stats per client, for (1) requests where the request deadline is set to expire before the - * EarlybirdRequest timeout, and also (2) requests where the deadline allows enough time for the - * EarlybirdRequest timeout to kick in. - */ -public class DeadlineTimeoutStatsFilter - extends SimpleFilter { - - // All stats maps below are per client id, keyed by the client id. - private final LoadingCache requestTimeoutNotSetStats; - private final LoadingCache finagleDeadlineNotSetStats; - private final LoadingCache finagleDeadlineAndRequestTimeoutNotSetStats; - private final LoadingCache requestTimeoutStats; - private final LoadingCache finagleDeadlineStats; - private final LoadingCache deadlineLargerStats; - private final LoadingCache deadlineSmallerStats; - - @Inject - public DeadlineTimeoutStatsFilter(Clock clock) { - this.requestTimeoutNotSetStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchCounter load(String clientId) { - return SearchCounter.export( - "deadline_for_client_id_" + clientId + "_request_timeout_not_set"); - } - }); - this.finagleDeadlineNotSetStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchCounter load(String clientId) { - return SearchCounter.export( - "deadline_for_client_id_" + clientId + "_finagle_deadline_not_set"); - } - }); - this.finagleDeadlineAndRequestTimeoutNotSetStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchCounter load(String clientId) { - return SearchCounter.export( - "deadline_for_client_id_" + clientId - + "_finagle_deadline_and_request_timeout_not_set"); - } - }); - this.requestTimeoutStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchTimerStats load(String clientId) { - return SearchTimerStats.export( - "deadline_for_client_id_" + clientId + "_request_timeout", - TimeUnit.MILLISECONDS, - false, - true, - clock); - } - }); - this.finagleDeadlineStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchTimerStats load(String clientId) { - return SearchTimerStats.export( - "deadline_for_client_id_" + clientId + "_finagle_deadline", - TimeUnit.MILLISECONDS, - false, - true, - clock); - } - }); - this.deadlineLargerStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchTimerStats load(String clientId) { - return SearchTimerStats.export( - "deadline_for_client_id_" + clientId - + "_finagle_deadline_larger_than_request_timeout", - TimeUnit.MILLISECONDS, - false, - true, - clock - ); - } - }); - this.deadlineSmallerStats = CacheBuilder.newBuilder().build( - new CacheLoader() { - public SearchTimerStats load(String clientId) { - return SearchTimerStats.export( - "deadline_for_client_id_" + clientId - + "_finagle_deadline_smaller_than_request_timeout", - TimeUnit.MILLISECONDS, - false, - true, - clock - ); - } - }); - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - - EarlybirdRequest request = requestContext.getRequest(); - String clientId = ClientIdUtil.getClientIdFromRequest(request); - long requestTimeoutMillis = getRequestTimeout(request); - Option deadline = Contexts$.MODULE$.broadcast().get(Deadline$.MODULE$); - - // Tracking per-client timeouts specified in the EarlybirdRequest. - if (requestTimeoutMillis > 0) { - requestTimeoutStats.getUnchecked(clientId).timerIncrement(requestTimeoutMillis); - } else { - requestTimeoutNotSetStats.getUnchecked(clientId).increment(); - } - - // How much time does this request have, from its deadline start, to the effective deadline. - if (deadline.isDefined()) { - long deadlineEndTimeMillis = deadline.get().deadline().inMillis(); - long deadlineStartTimeMillis = deadline.get().timestamp().inMillis(); - long finagleDeadlineTimeMillis = deadlineEndTimeMillis - deadlineStartTimeMillis; - finagleDeadlineStats.getUnchecked(clientId).timerIncrement(finagleDeadlineTimeMillis); - } else { - finagleDeadlineNotSetStats.getUnchecked(clientId).increment(); - } - - // Explicitly track when both are not set. - if (requestTimeoutMillis <= 0 && deadline.isEmpty()) { - finagleDeadlineAndRequestTimeoutNotSetStats.getUnchecked(clientId).increment(); - } - - // If both timeout and the deadline are set, track how much over / under we are, when - // comparing the deadline, and the EarlybirdRequest timeout. - if (requestTimeoutMillis > 0 && deadline.isDefined()) { - long deadlineEndTimeMillis = deadline.get().deadline().inMillis(); - Preconditions.checkState(request.isSetClientRequestTimeMs(), - "Expect ClientRequestTimeFilter to always set the clientRequestTimeMs field. Request: %s", - request); - long requestStartTimeMillis = request.getClientRequestTimeMs(); - long requestEndTimeMillis = requestStartTimeMillis + requestTimeoutMillis; - - long deadlineDiffMillis = deadlineEndTimeMillis - requestEndTimeMillis; - if (deadlineDiffMillis >= 0) { - deadlineLargerStats.getUnchecked(clientId).timerIncrement(deadlineDiffMillis); - } else { - // Track "deadline is smaller" as positive values. - deadlineSmallerStats.getUnchecked(clientId).timerIncrement(-deadlineDiffMillis); - } - } - - return service.apply(requestContext); - } - - private long getRequestTimeout(EarlybirdRequest request) { - if (request.isSetSearchQuery() - && request.getSearchQuery().isSetCollectorParams() - && request.getSearchQuery().getCollectorParams().isSetTerminationParams() - && request.getSearchQuery().getCollectorParams().getTerminationParams().isSetTimeoutMs()) { - - return request.getSearchQuery().getCollectorParams().getTerminationParams().getTimeoutMs(); - } else if (request.isSetTimeoutMs()) { - return request.getTimeoutMs(); - } else { - return -1; - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.docx new file mode 100644 index 000000000..f8953cdec Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.java b/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.java deleted file mode 100644 index 299d89d0f..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/DisableClientByTierFilter.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Optional; - -import javax.inject.Inject; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.earlybird.common.ClientIdUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.ThriftSearchResult; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird_root.quota.ClientIdQuotaManager; -import com.twitter.search.earlybird_root.quota.QuotaInfo; -import com.twitter.util.Future; - -public class DisableClientByTierFilter extends SimpleFilter { - private static final String CLIENT_BLOCKED_RESPONSE_PATTERN = - "Requests of client %s are blocked due to %s disable"; - - private final SearchDecider decider; - private final ClientIdQuotaManager quotaManager; - - /** - * Construct the filter by using ClientIdQuotaManager - */ - @Inject - public DisableClientByTierFilter(ClientIdQuotaManager quotaManager, SearchDecider decider) { - this.quotaManager = Preconditions.checkNotNull(quotaManager); - this.decider = decider; - } - - @Override - public Future apply(EarlybirdRequest request, - Service service) { - String clientId = ClientIdUtil.getClientIdFromRequest(request); - Optional quotaInfoOptional = quotaManager.getQuotaForClient(clientId); - QuotaInfo quotaInfo = quotaInfoOptional.orElseGet(quotaManager::getCommonPoolQuota); - // Tier value should exist: if client's tier value not in config file, it will be - // set to "no_tier" by default in ConfigBasedQuotaConfig - String tier = quotaInfo.getClientTier(); - - Preconditions.checkNotNull(tier); - - if (decider.isAvailable("superroot_unavailable_for_" + tier + "_clients")) { - return Future.value(getClientBlockedResponse(clientId, tier)); - } else { - return service.apply(request); - } - } - - private static EarlybirdResponse getClientBlockedResponse(String clientId, String tier) { - return new EarlybirdResponse(EarlybirdResponseCode.CLIENT_BLOCKED_BY_TIER_ERROR, 0) - .setSearchResults(new ThriftSearchResults() - .setResults(Lists.newArrayList())) - .setDebugString(String.format(CLIENT_BLOCKED_RESPONSE_PATTERN, clientId, tier)); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.docx new file mode 100644 index 000000000..f4ea7e849 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.java b/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.java deleted file mode 100644 index f7703b58c..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/DropAllProtectedOperatorFilter.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import javax.inject.Inject; - -import com.google.common.annotations.VisibleForTesting; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.visitors.DropAllProtectedOperatorVisitor; -import com.twitter.util.Future; - -public class DropAllProtectedOperatorFilter - extends SimpleFilter { - private static final Logger LOG = - LoggerFactory.getLogger(DropAllProtectedOperatorFilter.class); - private static final SearchCounter QUERY_PARSER_FAILURE_COUNTER = - SearchCounter.export("protected_operator_filter_query_parser_failure_count"); - @VisibleForTesting - static final SearchCounter TOTAL_REQUESTS_COUNTER = - SearchCounter.export("drop_all_protected_operator_filter_total"); - @VisibleForTesting - static final SearchCounter OPERATOR_DROPPED_REQUESTS_COUNTER = - SearchCounter.export("drop_all_protected_operator_filter_operator_dropped"); - - private final DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor; - - @Inject - public DropAllProtectedOperatorFilter( - DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor - ) { - this.dropProtectedOperatorVisitor = dropProtectedOperatorVisitor; - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - TOTAL_REQUESTS_COUNTER.increment(); - Query query = requestContext.getParsedQuery(); - if (query == null) { - return service.apply(requestContext); - } - - Query processedQuery = query; - try { - processedQuery = query.accept(dropProtectedOperatorVisitor); - } catch (QueryParserException e) { - // this should not happen since we already have a parsed query - QUERY_PARSER_FAILURE_COUNTER.increment(); - LOG.warn( - "Failed to drop protected operator for serialized query: " + query.serialize(), e); - } - - if (processedQuery == query) { - return service.apply(requestContext); - } else { - OPERATOR_DROPPED_REQUESTS_COUNTER.increment(); - EarlybirdRequestContext clonedRequestContext = - EarlybirdRequestContext.copyRequestContext(requestContext, processedQuery); - return service.apply(clonedRequestContext); - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.docx new file mode 100644 index 000000000..c72639656 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.java deleted file mode 100644 index 87b304a48..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdClusterAvailableFilter.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Collections; -import java.util.Map; - -import javax.inject.Inject; - -import com.google.common.collect.Maps; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.schema.earlybird.EarlybirdCluster; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.util.Future; - -/** - * A Finagle filter that determines if a certain cluster is available to the SuperRoot. - * - * Normally, all clusters should be available. However, if there's a problem with our systems, and - * our search clusters are causing issues for other services (time outs, for example), then we might - * want to be disable them, and return errors to our clients. - */ -public class EarlybirdClusterAvailableFilter - extends SimpleFilter { - private final SearchDecider decider; - private final EarlybirdCluster cluster; - private final String allRequestsDeciderKey; - private final Map requestTypeDeciderKeys; - private final Map disabledRequests; - - /** - * Creates a new EarlybirdClusterAvailableFilter instance. - * - * @param decider The decider to use to determine if this cluster is available. - * @param cluster The cluster. - */ - @Inject - public EarlybirdClusterAvailableFilter(SearchDecider decider, EarlybirdCluster cluster) { - this.decider = decider; - this.cluster = cluster; - - String clusterName = cluster.getNameForStats(); - this.allRequestsDeciderKey = "superroot_" + clusterName + "_cluster_available_for_all_requests"; - - Map tempDeciderKeys = Maps.newEnumMap(EarlybirdRequestType.class); - Map tempCounters = - Maps.newEnumMap(EarlybirdRequestType.class); - for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) { - String requestTypeName = requestType.getNormalizedName(); - tempDeciderKeys.put(requestType, "superroot_" + clusterName + "_cluster_available_for_" - + requestTypeName + "_requests"); - tempCounters.put(requestType, SearchCounter.export( - "cluster_available_filter_" + clusterName + "_" - + requestTypeName + "_disabled_requests")); - } - requestTypeDeciderKeys = Collections.unmodifiableMap(tempDeciderKeys); - disabledRequests = Collections.unmodifiableMap(tempCounters); - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType(); - if (!decider.isAvailable(allRequestsDeciderKey) - || !decider.isAvailable(requestTypeDeciderKeys.get(requestType))) { - disabledRequests.get(requestType).increment(); - return Future.value( - errorResponse("The " + cluster.getNameForStats() + " cluster is not available for " - + requestType.getNormalizedName() + " requests.")); - } - - return service.apply(requestContext); - } - - private EarlybirdResponse errorResponse(String debugMessage) { - return new EarlybirdResponse(EarlybirdResponseCode.PERSISTENT_ERROR, 0) - .setDebugString(debugMessage); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.docx b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.docx new file mode 100644 index 000000000..7055d1e74 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.java deleted file mode 100644 index f1034a514..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdFeatureSchemaAnnotateFilter.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.List; -import javax.inject.Inject; - -import com.twitter.finagle.Service; -import com.twitter.finagle.SimpleFilter; -import com.twitter.search.common.features.thrift.ThriftSearchFeatureSchemaSpecifier; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.util.Future; - -public class EarlybirdFeatureSchemaAnnotateFilter - extends SimpleFilter { - - private final EarlybirdFeatureSchemaMerger schemaMerger; - - @Inject - public EarlybirdFeatureSchemaAnnotateFilter(EarlybirdFeatureSchemaMerger merger) { - this.schemaMerger = merger; - } - - @Override - public Future apply( - EarlybirdRequestContext requestContext, - Service service) { - return service.apply(annotateRequestContext(requestContext)); - } - - /** - * Annotate the request to indicate the available features schemas before sending to earlybird. - * - * @param requestContext the earlybird request context - */ - private EarlybirdRequestContext annotateRequestContext(EarlybirdRequestContext requestContext) { - EarlybirdRequest request = requestContext.getRequest(); - if (request.isSetSearchQuery() - && request.getSearchQuery().isSetResultMetadataOptions() - && request.getSearchQuery().getResultMetadataOptions().isReturnSearchResultFeatures()) { - // Remember the available client side cached features schema in the context and prepare to - // reset it something new. - List featureSchemasAvailableInClient = - request.getSearchQuery().getResultMetadataOptions().getFeatureSchemasAvailableInClient(); - - return EarlybirdRequestContext.newContext( - request, - requestContext, - schemaMerger.getAvailableSchemaList(), // Set the available feature schemas based on - // what is cached in the current root. - featureSchemasAvailableInClient); - } else { - return requestContext; - } - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.docx b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.docx new file mode 100644 index 000000000..7cf4bbb67 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.java deleted file mode 100644 index a22d18f9f..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdResponseExceptionHandler.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.HashMap; -import java.util.Map; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.common.util.FinagleUtil; -import com.twitter.search.earlybird.thrift.EarlybirdRequest; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird_root.common.ClientErrorException; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.util.Function; -import com.twitter.util.Future; - -/** Converts exceptions into EarlybirdResponses with error codes. */ -public class EarlybirdResponseExceptionHandler { - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdResponseExceptionHandler.class); - - private final Map requestTypeToCancelledExceptions - = new HashMap<>(); - private final Map requestTypeToTimeoutExceptions - = new HashMap<>(); - private final Map requestTypeToPersistentErrors - = new HashMap<>(); - private final SearchCounter cancelledExceptions; - private final SearchCounter timeoutExceptions; - private final SearchCounter persistentErrors; - - /** - * Creates a new top level filter for handling exceptions. - */ - public EarlybirdResponseExceptionHandler(String statPrefix) { - this.cancelledExceptions = SearchCounter.export( - statPrefix + "_exception_handler_cancelled_exceptions"); - this.timeoutExceptions = SearchCounter.export( - statPrefix + "_exception_handler_timeout_exceptions"); - this.persistentErrors = SearchCounter.export( - statPrefix + "_exception_handler_persistent_errors"); - - for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) { - String requestTypeNormalized = requestType.getNormalizedName(); - requestTypeToCancelledExceptions.put(requestType, - SearchCounter.export( - statPrefix + "_exception_handler_cancelled_exceptions_" - + requestTypeNormalized)); - requestTypeToTimeoutExceptions.put(requestType, - SearchCounter.export( - statPrefix + "_exception_handler_timeout_exceptions_" - + requestTypeNormalized)); - requestTypeToPersistentErrors.put(requestType, - SearchCounter.export( - statPrefix + "_exception_handler_persistent_errors_" - + requestTypeNormalized)); - } - } - - /** - * If {@code responseFuture} is wraps an exception, converts it to an EarlybirdResponse instance - * with an appropriate error code. - * - * @param request The earlybird request. - * @param responseFuture The response future. - */ - public Future handleException(final EarlybirdRequest request, - Future responseFuture) { - return responseFuture.handle( - new Function() { - @Override - public EarlybirdResponse apply(Throwable t) { - if (t instanceof ClientErrorException) { - ClientErrorException clientExc = (ClientErrorException) t; - return new EarlybirdResponse() - .setResponseCode(EarlybirdResponseCode.CLIENT_ERROR) - .setDebugString(clientExc.getMessage()); - } else if (FinagleUtil.isCancelException(t)) { - requestTypeToCancelledExceptions.get(EarlybirdRequestType.of(request)) - .increment(); - cancelledExceptions.increment(); - return new EarlybirdResponse() - .setResponseCode(EarlybirdResponseCode.CLIENT_CANCEL_ERROR) - .setDebugString(t.getMessage()); - } else if (FinagleUtil.isTimeoutException(t)) { - requestTypeToTimeoutExceptions.get(EarlybirdRequestType.of(request)) - .increment(); - timeoutExceptions.increment(); - return new EarlybirdResponse() - .setResponseCode(EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) - .setDebugString(t.getMessage()); - } else { - // Unexpected exception: log it. - LOG.error("Caught unexpected exception.", t); - - requestTypeToPersistentErrors.get(EarlybirdRequestType.of(request)) - .increment(); - persistentErrors.increment(); - return new EarlybirdResponse() - .setResponseCode(EarlybirdResponseCode.PERSISTENT_ERROR) - .setDebugString(t.getMessage()); - } - } - }); - } -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.docx b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.docx new file mode 100644 index 000000000..b32f7f2c0 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.java deleted file mode 100644 index 8c05d6609..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdSuccessfulResponseHandler.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import com.twitter.search.common.clientstats.RequestCounters; -import com.twitter.search.common.clientstats.RequestCountersEventListener; -import com.twitter.search.earlybird.thrift.EarlybirdResponse; -import com.twitter.search.earlybird.thrift.EarlybirdResponseCode; -import com.twitter.search.earlybird.thrift.ThriftSearchResults; -import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults; - -import static com.twitter.search.common.util.earlybird.EarlybirdResponseUtil - .responseConsideredFailed; - - -/** - * Checks EarlybirdResponse's response to update stats. - */ -public final class EarlybirdSuccessfulResponseHandler - implements RequestCountersEventListener.SuccessfulResponseHandler { - - public static final EarlybirdSuccessfulResponseHandler INSTANCE = - new EarlybirdSuccessfulResponseHandler(); - - private EarlybirdSuccessfulResponseHandler() { } - - @Override - public void handleSuccessfulResponse( - EarlybirdResponse response, - RequestCounters requestCounters) { - - if (response == null) { - requestCounters.incrementRequestFailedCounter(); - return; - } - - if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) { - requestCounters.incrementRequestCancelCounter(); - } else if (response.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) { - requestCounters.incrementRequestTimedOutCounter(); - } else if (responseConsideredFailed(response.getResponseCode())) { - requestCounters.incrementRequestFailedCounter(); - } - - ThriftSearchResults results = response.getSearchResults(); - if (results != null) { - requestCounters.incrementResultCounter(results.getResultsSize()); - } - - ThriftTermStatisticsResults termStats = response.getTermStatisticsResults(); - if (termStats != null) { - requestCounters.incrementResultCounter(termStats.getTermResultsSize()); - } - } - -} diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.docx b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.docx new file mode 100644 index 000000000..38074aaa1 Binary files /dev/null and b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.docx differ diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.java deleted file mode 100644 index 16b1f60f5..000000000 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeFilterQueryRewriter.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.twitter.search.earlybird_root.filters; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.twitter.search.common.decider.SearchDecider; -import com.twitter.search.common.metrics.SearchCounter; -import com.twitter.search.earlybird.config.ServingRange; -import com.twitter.search.earlybird_root.common.EarlybirdRequestContext; -import com.twitter.search.earlybird_root.common.EarlybirdRequestType; -import com.twitter.search.queryparser.query.Conjunction; -import com.twitter.search.queryparser.query.Query; -import com.twitter.search.queryparser.query.QueryParserException; -import com.twitter.search.queryparser.query.search.SearchOperator; - -/** - * Adds query filters that filter out tweets outside a tier's serving range. Two tiers might load - * the same timeslice, so if the filtering is not done, the two tiers might return duplicates. The - * mergers should know how to handle the duplicates, but this might decrease the number or the - * quality of the returned results. - */ -public class EarlybirdTimeFilterQueryRewriter { - private static final Logger LOG = - LoggerFactory.getLogger(EarlybirdTimeFilterQueryRewriter.class); - - private static final Map NO_QUERY_COUNTS; - static { - final Map tempMap = - Maps.newEnumMap(EarlybirdRequestType.class); - for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) { - tempMap.put(requestType, SearchCounter.export( - "time_filter_query_rewriter_" + requestType.getNormalizedName() + "_no_query_count")); - } - NO_QUERY_COUNTS = Collections.unmodifiableMap(tempMap); - } - - @VisibleForTesting - static final Map ADD_SINCE_ID_MAX_ID_DECIDER_KEY_MAP; - static { - final String ADD_SINCE_ID_MAX_ID_DECIDER_KEY_TEMPLATE = - "add_since_id_max_id_operators_to_%s_query"; - final Map tempMap = Maps.newEnumMap(EarlybirdRequestType.class); - for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) { - tempMap.put( - requestType, - String.format(ADD_SINCE_ID_MAX_ID_DECIDER_KEY_TEMPLATE, requestType.getNormalizedName())); - } - ADD_SINCE_ID_MAX_ID_DECIDER_KEY_MAP = Collections.unmodifiableMap(tempMap); - } - - @VisibleForTesting - static final String ADD_SINCE_ID_MAX_ID_TO_NULL_SERIALIZED_QUERIES_DECIDER_KEY = - "add_since_id_max_id_operators_to_null_serialized_queries"; - - private final SearchDecider decider; - private final ServingRangeProvider servingRangeProvider; - - EarlybirdTimeFilterQueryRewriter( - ServingRangeProvider servingRangeProvider, - SearchDecider decider) { - - this.servingRangeProvider = servingRangeProvider; - this.decider = decider; - } - - /** - * Add maxId and sinceId fields to the serialized query. - * - * This must be done after calculating the IdTimeRanges to prevent interfering with calculating - * IdTimeRanges - */ - public EarlybirdRequestContext rewriteRequest(EarlybirdRequestContext requestContext) - throws QueryParserException { - Query q = requestContext.getParsedQuery(); - if (q == null) { - if (requestContext.getEarlybirdRequestType() != EarlybirdRequestType.TERM_STATS) { - LOG.warn("Received request without a parsed query: " + requestContext.getRequest()); - NO_QUERY_COUNTS.get(requestContext.getEarlybirdRequestType()).increment(); - } - - if (!decider.isAvailable(ADD_SINCE_ID_MAX_ID_TO_NULL_SERIALIZED_QUERIES_DECIDER_KEY)) { - return requestContext; - } - } - - return addOperators(requestContext, q); - } - - private EarlybirdRequestContext addOperators( - EarlybirdRequestContext requestContext, - @Nullable Query query) throws QueryParserException { - - // Add the SINCE_ID and MAX_ID operators only if the decider is enabled. - if (!decider.isAvailable( - ADD_SINCE_ID_MAX_ID_DECIDER_KEY_MAP.get(requestContext.getEarlybirdRequestType()))) { - return requestContext; - } - - // Note: can't recompute the search operators because the serving range changes in real time - // for the most recent tier. - ServingRange servingRange = servingRangeProvider.getServingRange( - requestContext, requestContext.useOverrideTierConfig()); - - long tierSinceId = servingRange.getServingRangeSinceId(); - SearchOperator sinceId = new SearchOperator(SearchOperator.Type.SINCE_ID, - Long.toString(tierSinceId)); - - long tierMaxId = servingRange.getServingRangeMaxId(); - SearchOperator maxId = new SearchOperator(SearchOperator.Type.MAX_ID, - Long.toString(tierMaxId)); - - List conjunctionChildren = (query == null) - ? Lists.newArrayList(sinceId, maxId) - : Lists.newArrayList(query, sinceId, maxId); - - Query restrictedQuery = new Conjunction(conjunctionChildren).simplify(); - - EarlybirdRequestContext copiedRequestContext = - EarlybirdRequestContext.copyRequestContext(requestContext, restrictedQuery); - - return copiedRequestContext; - } -}