the-algorithm/src/java/com/twitter/search/common/converter/earlybird/EncodedFeatureBuilder.java

532 lines
24 KiB
Java

package com.twitter.search.common.converter.earlybird;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.common.text.token.TokenizedCharSequence;
import com.twitter.common.text.token.TokenizedCharSequenceStream;
import com.twitter.common.text.util.TokenStreamSerializer;
import com.twitter.common_internal.text.version.PenguinVersion;
import com.twitter.search.common.indexing.thriftjava.Place;
import com.twitter.search.common.indexing.thriftjava.PotentialLocation;
import com.twitter.search.common.indexing.thriftjava.ProfileGeoEnrichment;
import com.twitter.search.common.indexing.thriftjava.ThriftExpandedUrl;
import com.twitter.search.common.indexing.thriftjava.VersionedTweetFeatures;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.relevance.entities.PotentialLocationObject;
import com.twitter.search.common.relevance.entities.TwitterMessage;
import com.twitter.search.common.relevance.features.FeatureSink;
import com.twitter.search.common.relevance.features.MutableFeatureNormalizers;
import com.twitter.search.common.relevance.features.RelevanceSignalConstants;
import com.twitter.search.common.relevance.features.TweetTextFeatures;
import com.twitter.search.common.relevance.features.TweetTextQuality;
import com.twitter.search.common.relevance.features.TweetUserFeatures;
import com.twitter.search.common.schema.base.FeatureConfiguration;
import com.twitter.search.common.schema.base.ImmutableSchemaInterface;
import com.twitter.search.common.schema.earlybird.EarlybirdEncodedFeatures;
import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant;
import com.twitter.search.common.util.lang.ThriftLanguageUtil;
import com.twitter.search.common.util.text.LanguageIdentifierHelper;
import com.twitter.search.common.util.text.NormalizerHelper;
import com.twitter.search.common.util.text.SourceNormalizer;
import com.twitter.search.common.util.text.TokenizerHelper;
import com.twitter.search.common.util.text.TokenizerResult;
import com.twitter.search.common.util.text.TweetTokenStreamSerializer;
import com.twitter.search.common.util.url.LinkVisibilityUtils;
import com.twitter.search.common.util.url.NativeVideoClassificationUtils;
import com.twitter.search.ingester.model.VisibleTokenRatioUtil;
/**
* EncodedFeatureBuilder helps to build encoded features for TwitterMessage.
*
* This is stateful so should only be used one tweet at a time
*/
public class EncodedFeatureBuilder {
private static final Logger LOG = LoggerFactory.getLogger(EncodedFeatureBuilder.class);
private static final SearchCounter NUM_TWEETS_WITH_INVALID_TWEET_ID_IN_PHOTO_URL =
SearchCounter.export("tweets_with_invalid_tweet_id_in_photo_url");
// TwitterTokenStream for converting TokenizedCharSequence into a stream for serialization
// This is stateful so should only be used one tweet at a time
private final TokenizedCharSequenceStream tokenSeqStream = new TokenizedCharSequenceStream();
// SUPPRESS CHECKSTYLE:OFF LineLength
private static final Pattern TWITTER_PHOTO_PERMA_LINK_PATTERN =
Pattern.compile("(?i:^(?:(?:https?\\:\\/\\/)?(?:www\\.)?)?twitter\\.com\\/(?:\\?[^#]+)?(?:#!?\\/?)?\\w{1,20}\\/status\\/(\\d+)\\/photo\\/\\d*$)");
private static final Pattern TWITTER_PHOTO_COPY_PASTE_LINK_PATTERN =
Pattern.compile("(?i:^(?:(?:https?\\:\\/\\/)?(?:www\\.)?)?twitter\\.com\\/(?:#!?\\/)?\\w{1,20}\\/status\\/(\\d+)\\/photo\\/\\d*$)");
// SUPPRESS CHECKSTYLE:ON LineLength
private static final VisibleTokenRatioUtil VISIBLE_TOKEN_RATIO = new VisibleTokenRatioUtil();
private static final Map<PenguinVersion, SearchCounter> SERIALIZE_FAILURE_COUNTERS_MAP =
Maps.newEnumMap(PenguinVersion.class);
static {
for (PenguinVersion penguinVersion : PenguinVersion.values()) {
SERIALIZE_FAILURE_COUNTERS_MAP.put(
penguinVersion,
SearchCounter.export(
"tokenstream_serialization_failure_" + penguinVersion.name().toLowerCase()));
}
}
public static class TweetFeatureWithEncodeFeatures {
public final VersionedTweetFeatures versionedFeatures;
public final EarlybirdEncodedFeatures encodedFeatures;
public final EarlybirdEncodedFeatures extendedEncodedFeatures;
public TweetFeatureWithEncodeFeatures(
VersionedTweetFeatures versionedFeatures,
EarlybirdEncodedFeatures encodedFeatures,
EarlybirdEncodedFeatures extendedEncodedFeatures) {
this.versionedFeatures = versionedFeatures;
this.encodedFeatures = encodedFeatures;
this.extendedEncodedFeatures = extendedEncodedFeatures;
}
}
/**
* Create tweet text features and the encoded features.
*
* @param message the tweet message
* @param penguinVersion the based penguin version to create the features
* @param schemaSnapshot the schema associated with the features
* @return the text features and the encoded features
*/
public TweetFeatureWithEncodeFeatures createTweetFeaturesFromTwitterMessage(
TwitterMessage message,
PenguinVersion penguinVersion,
ImmutableSchemaInterface schemaSnapshot) {
VersionedTweetFeatures versionedTweetFeatures = new VersionedTweetFeatures();
// Write extendedPackedFeatures.
EarlybirdEncodedFeatures extendedEncodedFeatures =
createExtendedEncodedFeaturesFromTwitterMessage(message, penguinVersion, schemaSnapshot);
if (extendedEncodedFeatures != null) {
extendedEncodedFeatures
.writeExtendedFeaturesToVersionedTweetFeatures(versionedTweetFeatures);
}
setSourceAndNormalizedSource(
message.getStrippedSource(), versionedTweetFeatures, penguinVersion);
TweetTextFeatures textFeatures = message.getTweetTextFeatures(penguinVersion);
///////////////////////////////
// Add hashtags and mentions
textFeatures.getHashtags().forEach(versionedTweetFeatures::addToHashtags);
textFeatures.getMentions().forEach(versionedTweetFeatures::addToMentions);
///////////////////////////////
// Extract some extra information from the message text.
// Index stock symbols with $ prepended
textFeatures.getStocks().stream()
.filter(stock -> stock != null)
.forEach(stock -> versionedTweetFeatures.addToStocks(stock.toLowerCase()));
// Question marks
versionedTweetFeatures.setHasQuestionMark(textFeatures.hasQuestionMark());
// Smileys
versionedTweetFeatures.setHasPositiveSmiley(textFeatures.hasPositiveSmiley());
versionedTweetFeatures.setHasNegativeSmiley(textFeatures.hasNegativeSmiley());
TokenStreamSerializer streamSerializer =
TweetTokenStreamSerializer.getTweetTokenStreamSerializer();
TokenizedCharSequence tokenSeq = textFeatures.getTokenSequence();
tokenSeqStream.reset(tokenSeq);
int tokenPercent = VISIBLE_TOKEN_RATIO.extractAndNormalizeTokenPercentage(tokenSeqStream);
tokenSeqStream.reset(tokenSeq);
// Write packedFeatures.
EarlybirdEncodedFeatures encodedFeatures = createEncodedFeaturesFromTwitterMessage(
message, penguinVersion, schemaSnapshot, tokenPercent);
encodedFeatures.writeFeaturesToVersionedTweetFeatures(versionedTweetFeatures);
try {
versionedTweetFeatures.setTweetTokenStream(streamSerializer.serialize(tokenSeqStream));
versionedTweetFeatures.setTweetTokenStreamText(tokenSeq.toString());
} catch (IOException e) {
LOG.error("TwitterTokenStream serialization error! Could not serialize: "
+ tokenSeq.toString());
SERIALIZE_FAILURE_COUNTERS_MAP.get(penguinVersion).increment();
versionedTweetFeatures.unsetTweetTokenStream();
versionedTweetFeatures.unsetTweetTokenStreamText();
}
// User name features
if (message.getFromUserDisplayName().isPresent()) {
Locale locale = LanguageIdentifierHelper
.identifyLanguage(message.getFromUserDisplayName().get());
String normalizedDisplayName = NormalizerHelper.normalize(
message.getFromUserDisplayName().get(), locale, penguinVersion);
TokenizerResult result = TokenizerHelper
.tokenizeTweet(normalizedDisplayName, locale, penguinVersion);
tokenSeqStream.reset(result.tokenSequence);
try {
versionedTweetFeatures.setUserDisplayNameTokenStream(
streamSerializer.serialize(tokenSeqStream));
versionedTweetFeatures.setUserDisplayNameTokenStreamText(result.tokenSequence.toString());
} catch (IOException e) {
LOG.error("TwitterTokenStream serialization error! Could not serialize: "
+ message.getFromUserDisplayName().get());
SERIALIZE_FAILURE_COUNTERS_MAP.get(penguinVersion).increment();
versionedTweetFeatures.unsetUserDisplayNameTokenStream();
versionedTweetFeatures.unsetUserDisplayNameTokenStreamText();
}
}
String resolvedUrlsText = Joiner.on(" ").skipNulls().join(textFeatures.getResolvedUrlTokens());
versionedTweetFeatures.setNormalizedResolvedUrlText(resolvedUrlsText);
addPlace(message, versionedTweetFeatures, penguinVersion);
addProfileGeoEnrichment(message, versionedTweetFeatures, penguinVersion);
versionedTweetFeatures.setTweetSignature(message.getTweetSignature(penguinVersion));
return new TweetFeatureWithEncodeFeatures(
versionedTweetFeatures, encodedFeatures, extendedEncodedFeatures);
}
protected static void setSourceAndNormalizedSource(
String strippedSource,
VersionedTweetFeatures versionedTweetFeatures,
PenguinVersion penguinVersion) {
if (strippedSource != null && !strippedSource.isEmpty()) {
// normalize source for searchable field - replaces whitespace with underscores (???).
versionedTweetFeatures.setNormalizedSource(
SourceNormalizer.normalize(strippedSource, penguinVersion));
// source facet has simpler normalization.
Locale locale = LanguageIdentifierHelper.identifyLanguage(strippedSource);
versionedTweetFeatures.setSource(NormalizerHelper.normalizeKeepCase(
strippedSource, locale, penguinVersion));
}
}
/**
* Adds the given photo url to the thrift status if it is a twitter photo permalink.
* Returns true, if this was indeed a twitter photo, false otherwise.
*/
public static boolean addPhotoUrl(TwitterMessage message, String photoPermalink) {
Matcher matcher = TWITTER_PHOTO_COPY_PASTE_LINK_PATTERN.matcher(photoPermalink);
if (!matcher.matches() || matcher.groupCount() < 1) {
matcher = TWITTER_PHOTO_PERMA_LINK_PATTERN.matcher(photoPermalink);
}
if (matcher.matches() && matcher.groupCount() == 1) {
// this is a native photo url which we need to store in a separate field
String idStr = matcher.group(1);
if (idStr != null) {
// idStr should be a valid tweet ID (and therefore, should fit into a Long), but we have
// tweets for which idStr is a long sequence of digits that does not fit into a Long.
try {
long photoStatusId = Long.parseLong(idStr);
message.addPhotoUrl(photoStatusId, null);
} catch (NumberFormatException e) {
LOG.warn("Found a tweet with a photo URL with an invalid tweet ID: " + message);
NUM_TWEETS_WITH_INVALID_TWEET_ID_IN_PHOTO_URL.increment();
}
}
return true;
}
return false;
}
private void addPlace(TwitterMessage message,
VersionedTweetFeatures versionedTweetFeatures,
PenguinVersion penguinVersion) {
String placeId = message.getPlaceId();
if (placeId == null) {
return;
}
// Tweet.Place.id and Tweet.Place.full_name are both required fields.
String placeFullName = message.getPlaceFullName();
Preconditions.checkNotNull(placeFullName, "Tweet.Place without full_name.");
Locale placeFullNameLocale = LanguageIdentifierHelper.identifyLanguage(placeFullName);
String normalizedPlaceFullName =
NormalizerHelper.normalize(placeFullName, placeFullNameLocale, penguinVersion);
String tokenizedPlaceFullName = StringUtils.join(
TokenizerHelper.tokenizeQuery(normalizedPlaceFullName, placeFullNameLocale, penguinVersion),
" ");
Place place = new Place(placeId, tokenizedPlaceFullName);
String placeCountryCode = message.getPlaceCountryCode();
if (placeCountryCode != null) {
Locale placeCountryCodeLocale = LanguageIdentifierHelper.identifyLanguage(placeCountryCode);
place.setCountryCode(
NormalizerHelper.normalize(placeCountryCode, placeCountryCodeLocale, penguinVersion));
}
versionedTweetFeatures.setTokenizedPlace(place);
}
private void addProfileGeoEnrichment(TwitterMessage message,
VersionedTweetFeatures versionedTweetFeatures,
PenguinVersion penguinVersion) {
List<PotentialLocationObject> potentialLocations = message.getPotentialLocations();
if (potentialLocations.isEmpty()) {
return;
}
List<PotentialLocation> thriftPotentialLocations = Lists.newArrayList();
for (PotentialLocationObject potentialLocation : potentialLocations) {
thriftPotentialLocations.add(potentialLocation.toThriftPotentialLocation(penguinVersion));
}
versionedTweetFeatures.setTokenizedProfileGeoEnrichment(
new ProfileGeoEnrichment(thriftPotentialLocations));
}
/** Returns the encoded features. */
public static EarlybirdEncodedFeatures createEncodedFeaturesFromTwitterMessage(
TwitterMessage message,
PenguinVersion penguinVersion,
ImmutableSchemaInterface schema,
int normalizedTokenPercentBucket) {
FeatureSink sink = new FeatureSink(schema);
// Static features
sink.setBooleanValue(EarlybirdFieldConstant.IS_RETWEET_FLAG, message.isRetweet())
.setBooleanValue(EarlybirdFieldConstant.IS_REPLY_FLAG, message.isReply())
.setBooleanValue(
EarlybirdFieldConstant.FROM_VERIFIED_ACCOUNT_FLAG, message.isUserVerified())
.setBooleanValue(
EarlybirdFieldConstant.FROM_BLUE_VERIFIED_ACCOUNT_FLAG, message.isUserBlueVerified())
.setBooleanValue(EarlybirdFieldConstant.IS_SENSITIVE_CONTENT, message.isSensitiveContent());
TweetTextFeatures textFeatures = message.getTweetTextFeatures(penguinVersion);
if (textFeatures != null) {
final FeatureConfiguration featureConfigNumHashtags = schema.getFeatureConfigurationByName(
EarlybirdFieldConstant.NUM_HASHTAGS.getFieldName());
final FeatureConfiguration featureConfigNumMentions = schema.getFeatureConfigurationByName(
EarlybirdFieldConstant.NUM_MENTIONS.getFieldName());
sink.setNumericValue(
EarlybirdFieldConstant.NUM_HASHTAGS,
Math.min(textFeatures.getHashtagsSize(), featureConfigNumHashtags.getMaxValue()))
.setNumericValue(
EarlybirdFieldConstant.NUM_MENTIONS,
Math.min(textFeatures.getMentionsSize(), featureConfigNumMentions.getMaxValue()))
.setBooleanValue(
EarlybirdFieldConstant.HAS_MULTIPLE_HASHTAGS_OR_TRENDS_FLAG,
TwitterMessage.hasMultipleHashtagsOrTrends(textFeatures))
.setBooleanValue(
EarlybirdFieldConstant.HAS_TREND_FLAG,
textFeatures.getTrendingTermsSize() > 0);
}
TweetTextQuality textQuality = message.getTweetTextQuality(penguinVersion);
if (textQuality != null) {
sink.setNumericValue(EarlybirdFieldConstant.TEXT_SCORE, textQuality.getTextScore());
sink.setBooleanValue(
EarlybirdFieldConstant.IS_OFFENSIVE_FLAG,
textQuality.hasBoolQuality(TweetTextQuality.BooleanQualityType.OFFENSIVE)
|| textQuality.hasBoolQuality(TweetTextQuality.BooleanQualityType.OFFENSIVE_USER)
// Note: if json message "possibly_sensitive" flag is set, we consider the tweet
// sensitive and is currently filtered out in safe search mode via a hacky setup:
// earlybird does not create _filter_sensitive_content field, only
// _is_offensive field is created, and used in filter:safe operator
|| textQuality.hasBoolQuality(TweetTextQuality.BooleanQualityType.SENSITIVE));
if (textQuality.hasBoolQuality(TweetTextQuality.BooleanQualityType.SENSITIVE)) {
sink.setBooleanValue(EarlybirdFieldConstant.IS_SENSITIVE_CONTENT, true);
}
} else {
// we don't have text score, for whatever reason, set to sentinel value so we won't be
// skipped by scoring function
sink.setNumericValue(EarlybirdFieldConstant.TEXT_SCORE,
RelevanceSignalConstants.UNSET_TEXT_SCORE_SENTINEL);
}
if (message.isSetLocale()) {
sink.setNumericValue(EarlybirdFieldConstant.LANGUAGE,
ThriftLanguageUtil.getThriftLanguageOf(message.getLocale()).getValue());
}
// User features
TweetUserFeatures userFeatures = message.getTweetUserFeatures(penguinVersion);
if (userFeatures != null) {
sink.setBooleanValue(EarlybirdFieldConstant.IS_USER_SPAM_FLAG, userFeatures.isSpam())
.setBooleanValue(EarlybirdFieldConstant.IS_USER_NSFW_FLAG, userFeatures.isNsfw())
.setBooleanValue(EarlybirdFieldConstant.IS_USER_BOT_FLAG, userFeatures.isBot());
}
if (message.getUserReputation() != TwitterMessage.DOUBLE_FIELD_NOT_PRESENT) {
sink.setNumericValue(EarlybirdFieldConstant.USER_REPUTATION,
(byte) message.getUserReputation());
} else {
sink.setNumericValue(EarlybirdFieldConstant.USER_REPUTATION,
RelevanceSignalConstants.UNSET_REPUTATION_SENTINEL);
}
sink.setBooleanValue(EarlybirdFieldConstant.IS_NULLCAST_FLAG, message.getNullcast());
// Realtime Ingestion does not write engagement features. Updater does that.
if (message.getNumFavorites() > 0) {
sink.setNumericValue(EarlybirdFieldConstant.FAVORITE_COUNT,
MutableFeatureNormalizers.BYTE_NORMALIZER.normalize(message.getNumFavorites()));
}
if (message.getNumRetweets() > 0) {
sink.setNumericValue(EarlybirdFieldConstant.RETWEET_COUNT,
MutableFeatureNormalizers.BYTE_NORMALIZER.normalize(message.getNumRetweets()));
}
if (message.getNumReplies() > 0) {
sink.setNumericValue(EarlybirdFieldConstant.REPLY_COUNT,
MutableFeatureNormalizers.BYTE_NORMALIZER.normalize(message.getNumReplies()));
}
sink.setNumericValue(EarlybirdFieldConstant.VISIBLE_TOKEN_RATIO, normalizedTokenPercentBucket);
EarlybirdEncodedFeatures encodedFeatures =
(EarlybirdEncodedFeatures) sink.getFeaturesForBaseField(
EarlybirdFieldConstant.ENCODED_TWEET_FEATURES_FIELD.getFieldName());
updateLinkEncodedFeatures(encodedFeatures, message);
return encodedFeatures;
}
/**
* Returns the extended encoded features.
*/
public static EarlybirdEncodedFeatures createExtendedEncodedFeaturesFromTwitterMessage(
TwitterMessage message,
PenguinVersion penguinVersion,
ImmutableSchemaInterface schema) {
FeatureSink sink = new FeatureSink(schema);
TweetTextFeatures textFeatures = message.getTweetTextFeatures(penguinVersion);
if (textFeatures != null) {
setExtendedEncodedFeatureIntValue(sink, schema,
EarlybirdFieldConstant.NUM_HASHTAGS_V2, textFeatures.getHashtagsSize());
setExtendedEncodedFeatureIntValue(sink, schema,
EarlybirdFieldConstant.NUM_MENTIONS_V2, textFeatures.getMentionsSize());
setExtendedEncodedFeatureIntValue(sink, schema,
EarlybirdFieldConstant.NUM_STOCKS, textFeatures.getStocksSize());
}
Optional<Long> referenceAuthorId = message.getReferenceAuthorId();
if (referenceAuthorId.isPresent()) {
setEncodedReferenceAuthorId(sink, referenceAuthorId.get());
}
return (EarlybirdEncodedFeatures) sink.getFeaturesForBaseField(
EarlybirdFieldConstant.EXTENDED_ENCODED_TWEET_FEATURES_FIELD.getFieldName());
}
/**
* Updates all URL-related features, based on the values stored in the given message.
*
* @param encodedFeatures The features to be updated.
* @param message The message.
*/
public static void updateLinkEncodedFeatures(
EarlybirdEncodedFeatures encodedFeatures, TwitterMessage message) {
if (message.getLinkLocale() != null) {
encodedFeatures.setFeatureValue(
EarlybirdFieldConstant.LINK_LANGUAGE,
ThriftLanguageUtil.getThriftLanguageOf(message.getLinkLocale()).getValue());
}
if (message.hasCard()) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_CARD_FLAG);
}
// Set HAS_IMAGE HAS_NEWS HAS_VIDEO etc. flags for expanded urls.
if (message.getExpandedUrlMapSize() > 0) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_LINK_FLAG);
for (ThriftExpandedUrl url : message.getExpandedUrlMap().values()) {
if (url.isSetMediaType()) {
switch (url.getMediaType()) {
case NATIVE_IMAGE:
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_IMAGE_URL_FLAG);
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_NATIVE_IMAGE_FLAG);
break;
case IMAGE:
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_IMAGE_URL_FLAG);
break;
case VIDEO:
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_VIDEO_URL_FLAG);
break;
case NEWS:
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_NEWS_URL_FLAG);
break;
case UNKNOWN:
break;
default:
throw new IllegalStateException("Unexpected enum value: " + url.getMediaType());
}
}
}
}
Set<String> canonicalLastHopUrlsStrings = message.getCanonicalLastHopUrls();
Set<String> expandedUrlsStrings = message.getExpandedUrls()
.stream()
.map(ThriftExpandedUrl::getExpandedUrl)
.collect(Collectors.toSet());
Set<String> expandedAndLastHopUrlsStrings = new HashSet<>();
expandedAndLastHopUrlsStrings.addAll(expandedUrlsStrings);
expandedAndLastHopUrlsStrings.addAll(canonicalLastHopUrlsStrings);
// Check both expanded and last hop url for consumer videos as consumer video urls are
// sometimes redirected to the url of the tweets containing the videos (SEARCH-42612).
if (NativeVideoClassificationUtils.hasConsumerVideo(expandedAndLastHopUrlsStrings)) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_CONSUMER_VIDEO_FLAG);
}
if (NativeVideoClassificationUtils.hasProVideo(canonicalLastHopUrlsStrings)) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_PRO_VIDEO_FLAG);
}
if (NativeVideoClassificationUtils.hasVine(canonicalLastHopUrlsStrings)) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_VINE_FLAG);
}
if (NativeVideoClassificationUtils.hasPeriscope(canonicalLastHopUrlsStrings)) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_PERISCOPE_FLAG);
}
if (LinkVisibilityUtils.hasVisibleLink(message.getExpandedUrls())) {
encodedFeatures.setFlag(EarlybirdFieldConstant.HAS_VISIBLE_LINK_FLAG);
}
}
private static void setExtendedEncodedFeatureIntValue(
FeatureSink sink,
ImmutableSchemaInterface schema,
EarlybirdFieldConstant field,
int value) {
boolean fieldInSchema = schema.hasField(field.getFieldName());
if (fieldInSchema) {
FeatureConfiguration featureConfig =
schema.getFeatureConfigurationByName(field.getFieldName());
sink.setNumericValue(field, Math.min(value, featureConfig.getMaxValue()));
}
}
private static void setEncodedReferenceAuthorId(FeatureSink sink, long referenceAuthorId) {
LongIntConverter.IntegerRepresentation ints =
LongIntConverter.convertOneLongToTwoInt(referenceAuthorId);
sink.setNumericValue(
EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_LEAST_SIGNIFICANT_INT, ints.leastSignificantInt);
sink.setNumericValue(
EarlybirdFieldConstant.REFERENCE_AUTHOR_ID_MOST_SIGNIFICANT_INT, ints.mostSignificantInt);
}
}