diff --git a/ann/src/main/java/com/twitter/ann/hnsw/DistancedItemQueue.java b/ann/src/main/java/com/twitter/ann/hnsw/DistancedItemQueue.java index f77f9c2b2..b2809a907 100644 --- a/ann/src/main/java/com/twitter/ann/hnsw/DistancedItemQueue.java +++ b/ann/src/main/java/com/twitter/ann/hnsw/DistancedItemQueue.java @@ -18,7 +18,7 @@ public class DistancedItemQueue implements Iterable> { private final PriorityQueue> queue; private final boolean minQueue; /** - * Creates ontainer for items with their distances. + * Creates container for items with their distances. * * @param origin Origin (reference) point * @param initial Initial list of elements to add in the structure @@ -94,7 +94,7 @@ public class DistancedItemQueue implements Iterable> { } /** - * Dequeue all the elements from queueu with ordering mantained + * Dequeue all the elements from queue with ordering maintained * * @return remove all the elements in the order of the queue i.e min/max queue. */ diff --git a/ann/src/main/java/com/twitter/ann/hnsw/HnswIndex.java b/ann/src/main/java/com/twitter/ann/hnsw/HnswIndex.java index 2f9c91409..7b1c7a0a7 100644 --- a/ann/src/main/java/com/twitter/ann/hnsw/HnswIndex.java +++ b/ann/src/main/java/com/twitter/ann/hnsw/HnswIndex.java @@ -379,7 +379,7 @@ public class HnswIndex { * This will reduce the recall. *

* For a full explanation of locking see this document: http://go/hnsw-locking - * The method returns the closest nearest neighbor (can be used as an enter point) + * The method returns the closest nearest neighbours (can be used as an enter point) */ private T mutuallyConnectNewElement( final T item, @@ -532,7 +532,7 @@ public class HnswIndex { * @param numOfNeighbours Number of neighbours to search for. * @param ef This param controls the accuracy of the search. * Bigger the ef better the accuracy on the expense of latency. - * Keep it atleast number of neighbours to find. + * Keep it at least number of neighbours to find. * @return Neighbours */ public List> searchKnn(final Q query, final int numOfNeighbours, final int ef) { diff --git a/ann/src/main/scala/com/twitter/ann/brute_force/BruteForceIndex.scala b/ann/src/main/scala/com/twitter/ann/brute_force/BruteForceIndex.scala index d737f57b7..4595e3aca 100644 --- a/ann/src/main/scala/com/twitter/ann/brute_force/BruteForceIndex.scala +++ b/ann/src/main/scala/com/twitter/ann/brute_force/BruteForceIndex.scala @@ -105,7 +105,7 @@ object SerializableBruteForceIndex { } /** - * This is a class that wrapps a BruteForceIndex and provides a method for serialization. + * This is a class that wraps a BruteForceIndex and provides a method for serialization. * * @param bruteForceIndex all queries and updates are sent to this index. * @param embeddingInjection injection that can convert embeddings to thrift embeddings. diff --git a/ann/src/main/scala/com/twitter/ann/dataflow/offline/ANNIndexBuilderBeamJob.scala b/ann/src/main/scala/com/twitter/ann/dataflow/offline/ANNIndexBuilderBeamJob.scala index 64ab583ab..88623ea15 100644 --- a/ann/src/main/scala/com/twitter/ann/dataflow/offline/ANNIndexBuilderBeamJob.scala +++ b/ann/src/main/scala/com/twitter/ann/dataflow/offline/ANNIndexBuilderBeamJob.scala @@ -229,7 +229,7 @@ object ANNIndexBuilderBeamJob extends ScioBeamJob[ANNOptions] { // Generate Index processedCollection.saveAsCustomOutput( - "Serialise to Disk", + "Serialize to Disk", OutputSink( out, opts.getAlgo.equals("faiss"), diff --git a/ann/src/main/scala/com/twitter/ann/hnsw/TypedHnswIndex.scala b/ann/src/main/scala/com/twitter/ann/hnsw/TypedHnswIndex.scala index 6bf99a61b..d34162a17 100644 --- a/ann/src/main/scala/com/twitter/ann/hnsw/TypedHnswIndex.scala +++ b/ann/src/main/scala/com/twitter/ann/hnsw/TypedHnswIndex.scala @@ -18,7 +18,7 @@ object TypedHnswIndex { * construction, but better index quality. At some point, increasing * ef_construction does not improve the quality of the index. One way to * check if the selection of ef_construction was ok is to measure a recall - * for M nearest neighbor search when ef = ef_constuction: if the recall is + * for M nearest neighbour search when ef = ef_construction: if the recall is * lower than 0.9, than there is room for improvement. * @param maxM The number of bi-directional links created for every new element during construction. * Reasonable range for M is 2-100. Higher M work better on datasets with high @@ -64,7 +64,7 @@ object TypedHnswIndex { * construction, but better index quality. At some point, increasing * ef_construction does not improve the quality of the index. One way to * check if the selection of ef_construction was ok is to measure a recall - * for M nearest neighbor search when ef = ef_constuction: if the recall is + * for M nearest neighbour search when ef = ef_construction: if the recall is * lower than 0.9, than there is room for improvement. * @param maxM The number of bi-directional links created for every new element during construction. * Reasonable range for M is 2-100. Higher M work better on datasets with high diff --git a/ann/src/main/scala/com/twitter/ann/scalding/offline/KnnTruthSetGenerator.scala b/ann/src/main/scala/com/twitter/ann/scalding/offline/KnnTruthSetGenerator.scala index 23b064fc3..0a886bf8b 100644 --- a/ann/src/main/scala/com/twitter/ann/scalding/offline/KnnTruthSetGenerator.scala +++ b/ann/src/main/scala/com/twitter/ann/scalding/offline/KnnTruthSetGenerator.scala @@ -12,7 +12,7 @@ import com.twitter.scalding.UniqueID import com.twitter.scalding_internal.job.TwitterExecutionApp /** - * This job reads index embedding data, query embeddings data, and split into index set, query set and true nearest neigbor set + * This job reads index embedding data, query embeddings data, and split into index set, query set and true nearest neighbour set * from query to index. */ object KnnTruthSetGenerator extends TwitterExecutionApp { diff --git a/ann/src/main/scala/com/twitter/ann/scalding/offline/indexbuilder/README.rst b/ann/src/main/scala/com/twitter/ann/scalding/offline/indexbuilder/README.rst index c58e9620a..9aed42301 100644 --- a/ann/src/main/scala/com/twitter/ann/scalding/offline/indexbuilder/README.rst +++ b/ann/src/main/scala/com/twitter/ann/scalding/offline/indexbuilder/README.rst @@ -95,7 +95,7 @@ General arguments (specified as **--profile.{options}**): - **num_dimensions** Dimension of embedding in the input data. An exception will be thrown if any entry does not have a number of dimensions equal to this number. - **metric** Distance metric (InnerProduct/Cosine/L2) - **concurrency_level** Specifies how many parallel inserts happen to the index. This should probably be set to the number of cores on the machine. -- **algo** The kind of index you want to ouput. The supported options right now are: +- **algo** The kind of index you want to output. The supported options right now are: 1. **hnsw** (Metric supported: Cosine, L2, InnerProduct) diff --git a/ann/src/main/scala/com/twitter/ann/service/loadtest/LoadTestRecorder.scala b/ann/src/main/scala/com/twitter/ann/service/loadtest/LoadTestRecorder.scala index 0bcdc9be8..f4e759296 100644 --- a/ann/src/main/scala/com/twitter/ann/service/loadtest/LoadTestRecorder.scala +++ b/ann/src/main/scala/com/twitter/ann/service/loadtest/LoadTestRecorder.scala @@ -179,7 +179,7 @@ class InMemoryLoadTestQueryRecorder[T]( latencyHistogram.add(queryLatency.inMicroseconds) counter.incrementAndGet() // Requests are assumed to have started around the time time of the first time record was called - // plus the time it took for that query to hhave completed. + // plus the time it took for that query to have completed. val (elapsedSinceFirstCall, firstQueryLatency) = elapsedTimeFun.get() val durationSoFar = elapsedSinceFirstCall() + firstQueryLatency elapsedTime.set(durationSoFar) diff --git a/ann/src/main/scala/com/twitter/ann/service/query_server/common/BaseQueryIndexServer.scala b/ann/src/main/scala/com/twitter/ann/service/query_server/common/BaseQueryIndexServer.scala index bac537d27..8ab5e9c38 100644 --- a/ann/src/main/scala/com/twitter/ann/service/query_server/common/BaseQueryIndexServer.scala +++ b/ann/src/main/scala/com/twitter/ann/service/query_server/common/BaseQueryIndexServer.scala @@ -24,7 +24,7 @@ abstract class BaseQueryIndexServer extends ThriftServer with Mtls { protected val environment: Flag[String] = flag[String]("environment", "service environment") /** - * Override with method to provide more module to guice. + * Override with method to provide more module to guide. */ protected def additionalModules: Seq[Module] diff --git a/ann/src/main/scala/com/twitter/ann/util/IndexBuilderUtils.scala b/ann/src/main/scala/com/twitter/ann/util/IndexBuilderUtils.scala index b0245f48a..83a6c4fda 100644 --- a/ann/src/main/scala/com/twitter/ann/util/IndexBuilderUtils.scala +++ b/ann/src/main/scala/com/twitter/ann/util/IndexBuilderUtils.scala @@ -15,7 +15,7 @@ object IndexBuilderUtils { concurrencyLevel: Int ): Future[Int] = { val count = new AtomicInteger() - // Async stream allows us to procss at most concurrentLevel futures at a time. + // Async stream allows us to process at most concurrentLevel futures at a time. Future.Unit.before { val stream = AsyncStream.fromSeq(embeddings) val appendStream = stream.mapConcurrent(concurrencyLevel) { annEmbedding => diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala index ecf0bb98e..d43763197 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala @@ -57,7 +57,7 @@ class UtegTweetCandidateGenerator @Inject() ( * supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof * instead. * - * However, filters and light ranker expect Candidate-typed param to work. In order to minimise the + * However, filters and light ranker expect Candidate-typed param to work. In order to minimize the * changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate * in this method. */ diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala index 879c96b66..e39050395 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala @@ -59,7 +59,7 @@ case class SimilarityEngineInfo( * * @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source * @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE - * @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic + * @param contributingSimilarityEngines - only composite SE will have it (e.g., SANN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic */ case class CandidateGenerationInfo( sourceInfoOpt: Option[SourceInfo], diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala index 26db7898b..6d9224a35 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala @@ -45,7 +45,7 @@ object ModelConfig { val DebuggerDemo: String = "DebuggerDemo" // ColdStartLookalike - this is not really a model name, it is as a placeholder to - // indicate ColdStartLookalike candidate source, which is currently being pluged into + // indicate ColdStartLookalike candidate source, which is currently being plugged into // CustomizedRetrievalCandidateGeneration temporarily. val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707" diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala index 599704fa7..a4e77af5d 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala @@ -63,7 +63,7 @@ case class WalsStats(scope: String, scopedStats: StatsReceiver) { } // StatsMap maintains a mapping from Model's input signature to a stats receiver -// The Wals model suports multiple input signature which can run different graphs internally and +// The Wals model supports multiple input signature which can run different graphs internally and // can have a different performance profile. // Invoking StatsReceiver.stat() on each request can create a new stat object and can be expensive // in performance critical paths. diff --git a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/recent_engagement/RepeatedProfileVisitsSource.scala b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/recent_engagement/RepeatedProfileVisitsSource.scala index c4b4aa3e7..8f5945cb4 100644 --- a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/recent_engagement/RepeatedProfileVisitsSource.scala +++ b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/recent_engagement/RepeatedProfileVisitsSource.scala @@ -105,7 +105,7 @@ class RepeatedProfileVisitsSource @Inject() ( val recommendationThreshold = params.getInt(RepeatedProfileVisitsParams.RecommendationThreshold) val bucketingThreshold = params.getInt(RepeatedProfileVisitsParams.BucketingThreshold) - // Get the list of repeatedly visited profilts. Only keep accounts with >= bucketingThreshold visits. + // Get the list of repeatedly visited profiles. Only keep accounts with >= bucketingThreshold visits. val repeatedVisitedAccountsStitch: Stitch[Map[Long, Int]] = getRepeatedVisitedAccounts(params, userId).map(_.filter(kv => kv._2 >= bucketingThreshold)) diff --git a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers/weighted_candidate_source_ranker/WeightedCandidateSourceRanker.scala b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers/weighted_candidate_source_ranker/WeightedCandidateSourceRanker.scala index c6f55adbc..02fd3e2bb 100644 --- a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers/weighted_candidate_source_ranker/WeightedCandidateSourceRanker.scala +++ b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers/weighted_candidate_source_ranker/WeightedCandidateSourceRanker.scala @@ -54,7 +54,7 @@ class WeightedCandidateSourceRanker[Target <: HasParams]( // Note 1: Using map instead mapValue here since mapValue somehow caused infinite loop when used as part of Stream. val sortAndShuffledCandidates = input.map { case (source, candidates) => - // Note 2: toList is required here since candidates is a view, and it will result in infinit loop when used as part of Stream. + // Note 2: toList is required here since candidates is a view, and it will result in infinite loop when used as part of Stream. // Note 3: there is no real sorting logic here, it assumes the input is already sorted by candidate sources val sortedCandidates = candidates.toList source -> shuffleFn(sortedCandidates).iterator diff --git a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/weighted_sampling/SamplingTransformParams.scala b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/weighted_sampling/SamplingTransformParams.scala index 363487a9b..4944cc47f 100644 --- a/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/weighted_sampling/SamplingTransformParams.scala +++ b/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/weighted_sampling/SamplingTransformParams.scala @@ -5,7 +5,7 @@ import com.twitter.timelines.configapi.FSParam object SamplingTransformParams { - case object TopKFixed // indicates how many of the fisrt K who-to-follow recommendations are reserved for the candidates with largest K CandidateUser.score where these candidates are sorted in decreasing order of score + case object TopKFixed // indicates how many of the first K who-to-follow recommendations are reserved for the candidates with largest K CandidateUser.score where these candidates are sorted in decreasing order of score extends FSBoundedParam[Int]( name = "post_nux_ml_flow_weighted_sampling_top_k_fixed", default = 0, diff --git a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows/post_nux_ml/PostNuxMlCombinedRankerBuilder.scala b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows/post_nux_ml/PostNuxMlCombinedRankerBuilder.scala index 14e982a41..4ea6d56a7 100644 --- a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows/post_nux_ml/PostNuxMlCombinedRankerBuilder.scala +++ b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows/post_nux_ml/PostNuxMlCombinedRankerBuilder.scala @@ -29,7 +29,7 @@ import com.twitter.timelines.configapi.HasParams * - truncating to the top N merged results for ranking * - ML ranker * - Interleaving ranker for producer-side experiments - * - impression-based fatigueing + * - impression-based fatiguing */ @Singleton class PostNuxMlCombinedRankerBuilder[ diff --git a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/logging/FrsLogger.scala b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/logging/FrsLogger.scala index 8b920c556..6cfb5e69e 100644 --- a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/logging/FrsLogger.scala +++ b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/logging/FrsLogger.scala @@ -125,7 +125,7 @@ object FrsLogger { /** The id of the current user. When the user is logged out, this method should return None. */ override val userId: Option[Long] = clientContext.userId - /** The id of the guest, which is present in logged-in or loged-out states */ + /** The id of the guest, which is present in logged-in or logged-out states */ override val guestId: Option[Long] = clientContext.guestId /** The personalization id (pid) of the user, used to personalize Twitter services */ diff --git a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/modules/FlagsModule.scala b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/modules/FlagsModule.scala index f8ff5ae94..f232b4996 100644 --- a/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/modules/FlagsModule.scala +++ b/follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/modules/FlagsModule.scala @@ -8,7 +8,7 @@ object FlagsModule extends TwitterModule { ) flag[Boolean]( name = "interests_opt_out_prod_enabled", - help = "Whether to fetch intersts opt out data from the prod strato column or not" + help = "Whether to fetch interests opt out data from the prod strato column or not" ) flag[Boolean]( name = "log_results", diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/model/ClearCacheIncludeInstruction.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/model/ClearCacheIncludeInstruction.scala index 85154e55b..1b80ac069 100644 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/model/ClearCacheIncludeInstruction.scala +++ b/home-mixer/server/src/main/scala/com/twitter/home_mixer/model/ClearCacheIncludeInstruction.scala @@ -13,7 +13,7 @@ import com.twitter.timelines.configapi.FSParam /** * Include a clear cache timeline instruction when we satisfy these criteria: * - Request Provenance is "pull to refresh" - * - Atleast N non-ad tweet entries in the response + * - At least N non-ad tweet entries in the response * * This is to ensure that we have sufficient new content to justify jumping users to the * top of the new timelines response and don't add unnecessary load to backend systems diff --git a/navi/dr_transform/src/converter.rs b/navi/dr_transform/src/converter.rs index 3097aedc0..da5a70052 100644 --- a/navi/dr_transform/src/converter.rs +++ b/navi/dr_transform/src/converter.rs @@ -25,7 +25,7 @@ pub fn log_feature_match( dr_type: String, ) { // Note the following algorithm matches features from config using linear search. - // Also the record source is MinDataRecord. This includes only binary and continous features for now. + // Also the record source is MinDataRecord. This includes only binary and continuous features for now. for (feature_id, feature_value) in dr.continuous_features.as_ref().unwrap() { debug!( @@ -303,7 +303,7 @@ impl BatchPredictionRequestToTorchTensorConverter { } // Todo : Refactor, create a generic version with different type and field accessors - // Example paramterize and then instiantiate the following + // Example parametrize and then instantiate the following // (FLOAT --> FLOAT, DataRecord.continuous_feature) // (BOOL --> INT64, DataRecord.binary_feature) // (INT64 --> INT64, DataRecord.discrete_feature) diff --git a/src/java/com/twitter/search/common/encoding/features/SingleBytePositiveFloatUtil.java b/src/java/com/twitter/search/common/encoding/features/SingleBytePositiveFloatUtil.java index 2894241e8..06a9f0b38 100644 --- a/src/java/com/twitter/search/common/encoding/features/SingleBytePositiveFloatUtil.java +++ b/src/java/com/twitter/search/common/encoding/features/SingleBytePositiveFloatUtil.java @@ -77,7 +77,7 @@ public final class SingleBytePositiveFloatUtil { // Table used for converting mantissa into a significant private static float[] mantissaToFractionTable = { - // Decimal Matisa value + // Decimal Mantissa value STEP_SIZE * 0, // 0000 STEP_SIZE * 1, // 0001 STEP_SIZE * 1, // 0010 diff --git a/src/java/com/twitter/search/common/relevance/entities/TwitterMessageUtil.java b/src/java/com/twitter/search/common/relevance/entities/TwitterMessageUtil.java index 7437de7fd..0d42a1fb2 100644 --- a/src/java/com/twitter/search/common/relevance/entities/TwitterMessageUtil.java +++ b/src/java/com/twitter/search/common/relevance/entities/TwitterMessageUtil.java @@ -399,7 +399,7 @@ public final class TwitterMessageUtil { * * @param text The text to truncate * @param maxLength The maximum length of the string after truncation - * @param field The field from which this string cames + * @param field The field from which this string came * @param splitEmojisAtMaxLength If true, don't worry about emojis and just truncate at maxLength, * potentially splitting them. If false, truncate before the emoji if truncating at maxLength * would cause the emoji to be split. diff --git a/src/java/com/twitter/search/common/relevance/features/MutableFeatureNormalizers.java b/src/java/com/twitter/search/common/relevance/features/MutableFeatureNormalizers.java index b44414ea3..c6f511736 100644 --- a/src/java/com/twitter/search/common/relevance/features/MutableFeatureNormalizers.java +++ b/src/java/com/twitter/search/common/relevance/features/MutableFeatureNormalizers.java @@ -13,7 +13,7 @@ public abstract class MutableFeatureNormalizers { // value (255, if using a byte). private static final int MAX_COUNTER_VALUE_SUPPORTED = 50000000; - // Avoid using this normalizer for procesing any new data, always use SmartIntegerNormalizer + // Avoid using this normalizer for processing any new data, always use SmartIntegerNormalizer // below. public static final SingleBytePositiveFloatNormalizer BYTE_NORMALIZER = new SingleBytePositiveFloatNormalizer(); diff --git a/src/java/com/twitter/search/common/relevance/features/TweetEngagementFeatures.java b/src/java/com/twitter/search/common/relevance/features/TweetEngagementFeatures.java index 22b610e4c..a6c8b8a85 100644 --- a/src/java/com/twitter/search/common/relevance/features/TweetEngagementFeatures.java +++ b/src/java/com/twitter/search/common/relevance/features/TweetEngagementFeatures.java @@ -4,7 +4,7 @@ import com.twitter.search.common.encoding.features.EncodedFeatures; /** * Holds engagement features for a particular tweet and encodes them as a single int. - * The features are: retweet count, favorite count, itweet score, reply count. + * The features are: retweet count, favorite count, tweet score, reply count. */ public class TweetEngagementFeatures extends EncodedFeatures { private static final int RETWEET_COUNT_BIT_SHIFT = 0; diff --git a/src/java/com/twitter/search/common/relevance/text/TweetParser.java b/src/java/com/twitter/search/common/relevance/text/TweetParser.java index df518ba5f..ae979003f 100644 --- a/src/java/com/twitter/search/common/relevance/text/TweetParser.java +++ b/src/java/com/twitter/search/common/relevance/text/TweetParser.java @@ -133,7 +133,7 @@ public class TweetParser { TokenizerResult result, PenguinVersion penguinVersion) { if (message.getHashtags().isEmpty()) { - // add hashtags to TwitterMessage if it doens't already have them, from + // add hashtags to TwitterMessage if it doesn't already have them, from // JSON entities, this happens when we do offline indexing for (String hashtag : sanitizeTokenizerResults(result.hashtags, '#')) { message.addHashtag(hashtag); @@ -141,7 +141,7 @@ public class TweetParser { } if (message.getMentions().isEmpty()) { - // add mentions to TwitterMessage if it doens't already have them, from + // add mentions to TwitterMessage if it doesn't already have them, from // JSON entities, this happens when we do offline indexing for (String mention : sanitizeTokenizerResults(result.mentions, '@')) { message.addMention(mention); diff --git a/src/java/com/twitter/search/common/search/TerminationTracker.java b/src/java/com/twitter/search/common/search/TerminationTracker.java index 916415078..bf0974d85 100644 --- a/src/java/com/twitter/search/common/search/TerminationTracker.java +++ b/src/java/com/twitter/search/common/search/TerminationTracker.java @@ -32,7 +32,7 @@ public class TerminationTracker { private final int postTerminationOverheadMillis; // We don't check for early termination often enough. Some times requests timeout in between - // early termination checks. This buffer time is also substracted from deadline. + // early termination checks. This buffer time is also subtracted from deadline. // To illustrate how this is used, let's use a simple example: // If we spent 750ms searching 5 segments, a rough estimate is that we need 150ms to search // one segment. If the timeout is set to 800ms, we should not starting searching the next segment. diff --git a/src/java/com/twitter/search/common/util/earlybird/FacetsResultsUtils.java b/src/java/com/twitter/search/common/util/earlybird/FacetsResultsUtils.java index 43d5732e4..df8c88a18 100644 --- a/src/java/com/twitter/search/common/util/earlybird/FacetsResultsUtils.java +++ b/src/java/com/twitter/search/common/util/earlybird/FacetsResultsUtils.java @@ -54,7 +54,7 @@ public final class FacetsResultsUtils { /** * Prepare facet fields with empty entries and check if we need termStats for filtering. - * Returns true if termStats filtering is needed (thus the termStats servie call). + * Returns true if termStats filtering is needed (thus the termStats service call). * @param facetRequest The related facet request. * @param facetFieldInfoMap The facet field info map to fill, a map from facet type to the facet * fiels results info. diff --git a/src/java/com/twitter/search/common/util/ml/prediction_engine/BaseModelBuilder.java b/src/java/com/twitter/search/common/util/ml/prediction_engine/BaseModelBuilder.java index 2d4d539ee..a25fc2a17 100644 --- a/src/java/com/twitter/search/common/util/ml/prediction_engine/BaseModelBuilder.java +++ b/src/java/com/twitter/search/common/util/ml/prediction_engine/BaseModelBuilder.java @@ -53,7 +53,7 @@ public abstract class BaseModelBuilder implements ModelBuilder { *

* Model name (Generated by ML API, but ignored by this class) * Feature definition: - * Name of the feature or definition from the MDL discretizer. + * Name of the feature or definition from the MDL discretizer * Weight: * Weight of the feature using LOGIT scale. *

diff --git a/src/java/com/twitter/search/common/util/ml/prediction_engine/DiscretizedFeatureRange.java b/src/java/com/twitter/search/common/util/ml/prediction_engine/DiscretizedFeatureRange.java index 725009ab0..454b698c9 100644 --- a/src/java/com/twitter/search/common/util/ml/prediction_engine/DiscretizedFeatureRange.java +++ b/src/java/com/twitter/search/common/util/ml/prediction_engine/DiscretizedFeatureRange.java @@ -3,7 +3,7 @@ package com.twitter.search.common.util.ml.prediction_engine; import com.google.common.base.Preconditions; /** - * The discretized value range for a continous feature. After discretization a continuous feature + * The discretized value range for a continuous feature. After discretization a continuous feature * may become multiple discretized binary features, each occupying a range. This class stores this * range and a weight for it. */ diff --git a/src/java/com/twitter/search/common/util/ml/prediction_engine/LightweightLinearModel.java b/src/java/com/twitter/search/common/util/ml/prediction_engine/LightweightLinearModel.java index 57324120b..2839a7ed5 100644 --- a/src/java/com/twitter/search/common/util/ml/prediction_engine/LightweightLinearModel.java +++ b/src/java/com/twitter/search/common/util/ml/prediction_engine/LightweightLinearModel.java @@ -20,7 +20,7 @@ import com.twitter.search.common.file.AbstractFile; * * - Only linear models are supported. * - Only binary and continuous features (i.e. it doesn't support discrete/categorical features). - * - It supports the MDL discretizer (but not the one based on trees). + * - It supports the MDL discretiser (but not the one based on trees). * - It doesn't support feature crossings. * * Instances of this class should be created using only the load methods (loadFromHdfs and diff --git a/src/java/com/twitter/search/common/util/ml/prediction_engine/ModelLoader.java b/src/java/com/twitter/search/common/util/ml/prediction_engine/ModelLoader.java index 7809161b0..1f985522a 100644 --- a/src/java/com/twitter/search/common/util/ml/prediction_engine/ModelLoader.java +++ b/src/java/com/twitter/search/common/util/ml/prediction_engine/ModelLoader.java @@ -60,7 +60,7 @@ public class ModelLoader implements Runnable { * ${counterPrefix}_num_models: * Number of models currently loaded. * ${counterPrefix}_num_loads: - * Number of succesful model loads. + * Number of successful model loads. * ${counterPrefix}_num_errors: * Number of errors occurred while loading the models. */ diff --git a/src/java/com/twitter/search/earlybird/partition/EarlybirdIndexLoader.java b/src/java/com/twitter/search/earlybird/partition/EarlybirdIndexLoader.java index 1806bd106..444b6e132 100644 --- a/src/java/com/twitter/search/earlybird/partition/EarlybirdIndexLoader.java +++ b/src/java/com/twitter/search/earlybird/partition/EarlybirdIndexLoader.java @@ -156,7 +156,7 @@ public class EarlybirdIndexLoader { FlushInfo segmentsFlushInfo = indexInfo.getSubProperties(EarlybirdIndexFlusher.SEGMENTS); List segmentNames = Lists.newArrayList(segmentsFlushInfo.getKeyIterator()); - // This should only happen if you're running in stagingN and loading a prod index through + // This should only happen if you're running in staging and loading a prod index through // the read_index_from_prod_location flag. In this case, we point to a directory that has // a lot more than the number of segments we want in staging and we trim this list to the // desired number. diff --git a/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java b/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java index f91d2d3c4..a8258bbd5 100644 --- a/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java +++ b/src/java/com/twitter/search/earlybird_root/common/EarlybirdFeatureSchemaMerger.java @@ -94,7 +94,7 @@ public class EarlybirdFeatureSchemaMerger { * @param searchResults the response * @param requestContext the request, which should record the client cached feature schemas * @param statPrefix the stats prefix string - * @param successfulResponses all successfull responses from downstream + * @param successfulResponses all successful responses from downstream */ public void collectAndSetFeatureSchemaInResponse( ThriftSearchResults searchResults, @@ -149,7 +149,7 @@ public class EarlybirdFeatureSchemaMerger { * (This is done inside superroot) * @param requestContext the search request context * @param mergedResponse the merged result inside the superroot - * @param realtimeResponse the realtime tier resposne + * @param realtimeResponse the realtime tier response * @param protectedResponse the protected tier response * @param fullArchiveResponse the full archive tier response * @param statsPrefix diff --git a/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java b/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java index 0df98b34e..bba6cb75b 100644 --- a/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java +++ b/src/java/com/twitter/search/earlybird_root/common/QueryParsingUtils.java @@ -43,7 +43,7 @@ public final class QueryParsingUtils { * * @param request the earlybird request to parse. * @return null if the request does not specify a serialized query. - * @throws QueryParserException if querry parsing fails. + * @throws QueryParserException if query parsing fails. */ @Nullable static Query getParsedQuery(EarlybirdRequest request) throws QueryParserException { diff --git a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeRangeFilter.java b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeRangeFilter.java index bd5eda6de..5c846acbe 100644 --- a/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeRangeFilter.java +++ b/src/java/com/twitter/search/earlybird_root/filters/EarlybirdTimeRangeFilter.java @@ -131,7 +131,7 @@ public class EarlybirdTimeRangeFilter extends // As long as a query overlaps with the tier serving range on either side, // the request is not filtered. I.e. we want to be conservative when doing this filtering, // because it is just an optimization. We ignore the inclusiveness / exclusiveness of the - // boundaries. If the tier boundary and the query boundry happen to be the same, we do not + // boundaries. If the tier boundary and the query boundary happen to be the same, we do not // filter the request. return queryRanges.getSinceIDExclusive().or(0L) > servingRange.getServingRangeMaxId() diff --git a/src/java/com/twitter/search/earlybird_root/mergers/EarlybirdResponseMerger.java b/src/java/com/twitter/search/earlybird_root/mergers/EarlybirdResponseMerger.java index e52e70b29..5571adeac 100644 --- a/src/java/com/twitter/search/earlybird_root/mergers/EarlybirdResponseMerger.java +++ b/src/java/com/twitter/search/earlybird_root/mergers/EarlybirdResponseMerger.java @@ -138,7 +138,7 @@ public abstract class EarlybirdResponseMerger implements EarlyTerminateTierMerge // thread_running_future_{i-1} and thread_running_future_i is crossed. This guarantees // that thread_running_future_i will see the updates to mergeHelper before it sees the // callbacks. (Or thread_running_future_{i-1} == thread_running_future_i, in which case - // synchronization is not an issue, and correctness is guarateed by the order in which + // synchronization is not an issue, and correctness is guaranteed by the order in which // things will run.) // 4. The same reasoning applies to currentFutureIndex. diff --git a/src/java/com/twitter/search/earlybird_root/mergers/RecencyResponseMerger.java b/src/java/com/twitter/search/earlybird_root/mergers/RecencyResponseMerger.java index bc4742493..f7e634c4a 100644 --- a/src/java/com/twitter/search/earlybird_root/mergers/RecencyResponseMerger.java +++ b/src/java/com/twitter/search/earlybird_root/mergers/RecencyResponseMerger.java @@ -481,8 +481,8 @@ public class RecencyResponseMerger extends EarlybirdResponseMerger { /** * Trim results based on search range. The search range [x, y] is determined by: - * x is the maximun of the minimun search IDs; - * y is the minimun of the maximum search IDs. + * x is the maximum of the minimum search IDs; + * y is the minimum of the maximum search IDs. * * Ids out side of this range are removed. * If we do not get enough results after the removal, we add IDs back until we get enough results. diff --git a/src/java/com/twitter/search/earlybird_root/mergers/StrictRecencyResponseMerger.java b/src/java/com/twitter/search/earlybird_root/mergers/StrictRecencyResponseMerger.java index 4ea72717e..3e7678bf1 100644 --- a/src/java/com/twitter/search/earlybird_root/mergers/StrictRecencyResponseMerger.java +++ b/src/java/com/twitter/search/earlybird_root/mergers/StrictRecencyResponseMerger.java @@ -212,7 +212,7 @@ public class StrictRecencyResponseMerger extends RecencyResponseMerger { // We don't need to worry about the tier bottom when merging partition responses in the full // archive cluster: if all partitions were exhausted and we didn't trim the results, then // the early-terminated flag on the merged response will be false. If at least one partition - // is early-terminated, or we trimmed some results, then the ealry-terminated flag on the + // is early-terminated, or we trimmed some results, then the early-terminated flag on the // merged response will be true, and we should continue getting results from this tier before // we move to the next one. return false; diff --git a/src/java/com/twitter/search/feature_update_service/modules/FuturePoolModule.java b/src/java/com/twitter/search/feature_update_service/modules/FuturePoolModule.java index 537f67559..9a8430171 100644 --- a/src/java/com/twitter/search/feature_update_service/modules/FuturePoolModule.java +++ b/src/java/com/twitter/search/feature_update_service/modules/FuturePoolModule.java @@ -32,7 +32,7 @@ public class FuturePoolModule extends TwitterModule { /** * Create a future pool backed by executor service, with bounded thread pool and bounded backing - * queue. ONLY VISIBILE FOR TESTING; don't invoke outside this class. + * queue. ONLY VISIBLE FOR TESTING; don't invoke outside this class. */ @VisibleForTesting public static ExecutorServiceFuturePool createFuturePool( diff --git a/src/java/com/twitter/search/feature_update_service/modules/TweetypieModule.java b/src/java/com/twitter/search/feature_update_service/modules/TweetypieModule.java index 6fd041cd4..6b997f5b1 100644 --- a/src/java/com/twitter/search/feature_update_service/modules/TweetypieModule.java +++ b/src/java/com/twitter/search/feature_update_service/modules/TweetypieModule.java @@ -36,7 +36,7 @@ public class TweetypieModule extends TwitterModule { ThriftMux.Client thriftMux, StatsReceiver statsReceiver) throws InterruptedException { // TweetService is TweetService (tweetypie) with different api - // Since TweetService will be primarly used for interacting with + // Since TweetService will be primarily used for interacting with // tweetypie's flexible schema (MH), we will increase request // timeout and retries but share other settings from TweetService. @SuppressWarnings("unchecked") diff --git a/src/java/com/twitter/search/feature_update_service/whitelist/ClientIdWhitelist.java b/src/java/com/twitter/search/feature_update_service/whitelist/ClientIdWhitelist.java index 4718c547e..5d8c40497 100644 --- a/src/java/com/twitter/search/feature_update_service/whitelist/ClientIdWhitelist.java +++ b/src/java/com/twitter/search/feature_update_service/whitelist/ClientIdWhitelist.java @@ -44,7 +44,7 @@ public class ClientIdWhitelist extends PeriodicFileLoader { /** * Creates clock and executor service needed to create a periodic file loading object - * then returns object that accpets file. + * then returns object that accepts file. * @param clientWhitelistPath * @return ClientIdWhitelist * @throws Exception diff --git a/src/java/com/twitter/search/ingester/README.md b/src/java/com/twitter/search/ingester/README.md index ee0a2b15a..f7a93d999 100644 --- a/src/java/com/twitter/search/ingester/README.md +++ b/src/java/com/twitter/search/ingester/README.md @@ -5,6 +5,6 @@ There are two types of ingesters: 1. Tweet ingesters 2. UserUpdates ingesters -Tweet ingesters consume raw tweets and extract different fields and features for Earlybird to index. User updates ingester produces user safety information such as whether the user is deactivated, suspended or off-boarded. The user and tweet features produced by ingesters are then used by Earlybird during tweet retieval and ranking. +Tweet ingesters consume raw tweets and extract different fields and features for Earlybird to index. User updates ingester produces user safety information such as whether the user is deactivated, suspended or off-boarded. The user and tweet features produced by ingesters are then used by Earlybird during tweet retrieval and ranking. Ingesters are made up of a pipeline of stages with each stage performing a different field/feature extraction. The pipeline configuration of the ingesters can be found at science/search/ingester/config diff --git a/src/java/com/twitter/search/ingester/pipeline/twitter/TwitterBatchedBaseStage.java b/src/java/com/twitter/search/ingester/pipeline/twitter/TwitterBatchedBaseStage.java index fda5b6166..b4a510da0 100644 --- a/src/java/com/twitter/search/ingester/pipeline/twitter/TwitterBatchedBaseStage.java +++ b/src/java/com/twitter/search/ingester/pipeline/twitter/TwitterBatchedBaseStage.java @@ -79,7 +79,7 @@ public abstract class TwitterBatchedBaseStage extends protected abstract boolean needsToBeBatched(T element); /** - * Tranform from type T to U element. + * Transform from type T to U element. * T and U might be different types so this function will help with the transformation * if the incoming T element is filtered out and is bypass directly to the next stage * that takes incoming objects of type U