Fix additional typos in various comments/docs

2025-03-12 22:36:35 +01:00 · 2023-03-31 15:25:04 -04:00 · 2023-03-31 15:25:04 -04:00 · cb1ff279f2
commit cb1ff279f2
parent 974d6458af
10 changed files with 22 additions and 22 deletions
--- a/common/checkpointing/snapshot.py
+++ b/common/checkpointing/snapshot.py
@ -101,7 +101,7 @@ class Snapshot:
    weight_tensor,
  ) -> None:
    """Loads pretrained embedding from the snapshot to the model.
-       Utilise partial lodaing meachanism from torchsnapshot.
+       Utilise partial loading mechanism from torchsnapshot.
    Args:
      embedding_snapshot: Path to the snapshot containing pretrained embeddings (EBC).
      snapshot_emb_name: Name of the layer in the *snapshot* model, containing the EBC.
--- a/core/config/config_load.py
+++ b/core/config/config_load.py
@ -11,7 +11,7 @@ def load_config_from_yaml(config_type: Type[BaseConfig], yaml_path: str):
  """Recommend method to load a config file (a yaml file) and parse it.

  Because we have a shared filesystem the recommended route to running jobs it put modified config
-  files with the desired parameters somewhere on the filesytem and run jobs pointing to them.
+  files with the desired parameters somewhere on the filesystem and run jobs pointing to them.
  """

  def _substitute(s):
--- a/core/custom_training_loop.py
+++ b/core/custom_training_loop.py
@ -28,7 +28,7 @@ import torchmetrics as tm

 def get_new_iterator(iterable: Iterable):
  """
-  This obtain a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
+  This obtains a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
   getting a new iterator each N steps will avoid memory leak. To avoid the memory leak
   calling iter(iterable) should return a "fresh" iterator using a fresh
   (new instance of) tf.data.Iterator.
@ -115,7 +115,7 @@ def train(
    dataset: data iterator for the training set
    evaluation_iterators: data iterators for the different evaluation sets
    scheduler: optional learning rate scheduler
-    output_transform_for_metrics: optional transformation functions to transorm the model
+    output_transform_for_metrics: optional transformation functions to transform the model
                                  output and labels into a format the metrics can understand
  """

--- a/core/debug_training_loop.py
+++ b/core/debug_training_loop.py
@ -1,6 +1,6 @@
 """This is a very limited feature training loop useful for interactive debugging.

-It is not intended for actual model tranining (it is not fast, doesn't compile the model).
+It is not intended for actual model training (it is not fast, doesn't compile the model).
 It does not support checkpointing.

 suggested use:
--- a/projects/home/recap/FEATURES.md
+++ b/projects/home/recap/FEATURES.md
@ -73,7 +73,7 @@ author (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@ -112,7 +112,7 @@ original_author (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@ -544,7 +544,7 @@ user (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@ -585,7 +585,7 @@ user (48h_real_time_v5)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@ -1422,7 +1422,7 @@ topic (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@ -1460,7 +1460,7 @@ topic (24_hour_real_time)
 </code>
 </td>
 <td>
-<code>timelines.enagagement.is_retweeted_without_quote<br>
+<code>timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_block_clicked<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
@ -1552,7 +1552,7 @@ These features aggregate values corresponding to a tweet.
 <tr>
 <td><code>tweet (real_time)</code></td>
 <td><code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@ -1954,7 +1954,7 @@ recap.tweetfeature.match_ui_lang <br>
 recap.tweetfeature.mention_searcher <br>
 recap.tweetfeature.num_hashtags <br>
 recap.tweetfeature.num_mentions <br>
-recap.tweetfeature.prev_user_tweet_enagagement <br>
+recap.tweetfeature.prev_user_tweet_engagement <br>
 recap.tweetfeature.reply_other <br>
 recap.tweetfeature.reply_searcher <br>
 recap.tweetfeature.retweet_other <br>
@ -2081,7 +2081,7 @@ in_reply_to_tweet.recap.tweetfeature.is_offensive <br>
 in_reply_to_tweet.recap.tweetfeature.is_reply <br>
 in_reply_to_tweet.recap.tweetfeature.is_sensitive <br>
 in_reply_to_tweet.recap.tweetfeature.num_mentions <br>
-in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_enagagement <br>
+in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_engagement <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectiona_fav_count <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectional_reply_count <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectional_retweet_count <br>
--- a/projects/home/recap/data/config.py
+++ b/projects/home/recap/data/config.py
@ -50,7 +50,7 @@ class DatasetConfig(base_config.BaseConfig):
    None, description="Number of shards to keep."
  )
  repeat_files: bool = pydantic.Field(
-    True, description="DEPRICATED. Files are repeated no matter what this is set to."
+    True, description="Deprecated. Files are repeated no matter what this is set to."
  )
  file_batch_size: pydantic.PositiveInt = pydantic.Field(16, description="File batch size")

--- a/projects/home/recap/data/dataset.py
+++ b/projects/home/recap/data/dataset.py
@ -47,7 +47,7 @@ def to_batch(x, sparse_feature_names: Optional[List[str]] = None) -> RecapBatch:
  try:
    features_in, labels = x
  except ValueError:
-    # For Mode.INFERENCE, we do not expect to recieve labels as part of the input tuple
+    # For Mode.INFERENCE, we do not expect to receive labels as part of the input tuple
    features_in, labels = x, None

  sparse_features = keyed_jagged_tensor_from_tensors_dict({})
@ -398,7 +398,7 @@ class RecapDataset(torch.utils.data.IterableDataset):
        )
    else:
      raise ValueError(
-        "Must specifiy either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
+        "Must specify either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
      )

    num_files = len(filenames)
--- a/projects/home/recap/data/tfe_parsing.py
+++ b/projects/home/recap/data/tfe_parsing.py
@ -15,7 +15,7 @@ def create_tf_example_schema(
  data_config: recap_data_config.SegDenseSchema,
  segdense_schema,
 ):
-  """Generate schema for deseralizing tf.Example.
+  """Generate schema for deserializing tf.Example.

  Args:
    segdense_schema: List of dicts of segdense features (includes feature_name, dtype, length).
@ -58,12 +58,12 @@ def create_tf_example_schema(

@functools.lru_cache(1)
 def make_mantissa_mask(mask_length: int) -> tf.Tensor:
-  """For experimentating with emulating bfloat16 or less precise types."""
+  """For experimenting with emulating bfloat16 or less precise types."""
  return tf.constant((1 << 32) - (1 << mask_length), dtype=tf.int32)


 def mask_mantissa(tensor: tf.Tensor, mask_length: int) -> tf.Tensor:
-  """For experimentating with emulating bfloat16 or less precise types."""
+  """For experimenting with emulating bfloat16 or less precise types."""
  mask: tf.Tensor = make_mantissa_mask(mask_length)
  return tf.bitcast(tf.bitwise.bitwise_and(tf.bitcast(tensor, tf.int32), mask), tensor.dtype)

--- a/projects/home/recap/model/config.py
+++ b/projects/home/recap/model/config.py
@ -18,7 +18,7 @@ class DropoutConfig(base_config.BaseConfig):


 class LayerNormConfig(base_config.BaseConfig):
-  """Configruation for the layer normalization."""
+  """Configuration for the layer normalization."""

  epsilon: float = pydantic.Field(
    1e-3, description="Small float added to variance to avoid dividing by zero."
--- a/projects/twhin/data/edges.py
+++ b/projects/twhin/data/edges.py
@ -96,7 +96,7 @@ class EdgesDataset(Dataset):

    Returns a KeyedJaggedTensor used to look up all embeddings.

-    Note: We treat the lhs and rhs as though they're separate lookups: `len(lenghts) == 2 * bsz * len(tables)`.
+    Note: We treat the lhs and rhs as though they're separate lookups: `len(lengths) == 2 * bsz * len(tables)`.
    This differs from the DLRM pattern where we have `len(lengths) = bsz * len(tables)`.

    For the example above: