the-algorithm/src/java/com/twitter/search/common/schema/SearchWhitespaceAnalyzer.java

28 lines
978 B
Java

package com.twitter.search.common.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* The majority of the code is copied from Lucene 3.1 analysis.core.WhitespaceAnalyzer. The only
* new code is the getPositionIncrementGap()
*/
public final class SearchWhitespaceAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
return new TokenStreamComponents(new WhitespaceTokenizer());
}
/**
* Make sure that phrase queries do not match across 2 instances of the text field.
*
* See the Javadoc for Analyzer.getPositionIncrementGap() for a good explanation of how this
* method works.
*/
@Override
public int getPositionIncrementGap(String fieldName) {
// Hard-code "text" here, because we can't depend on EarlybirdFieldConstants.
return "text".equals(fieldName) ? 1 : super.getPositionIncrementGap(fieldName);
}
}