diff --git a/common/checkpointing/snapshot.py b/common/checkpointing/snapshot.py
index 2703efd..c2b88c6 100644
--- a/common/checkpointing/snapshot.py
+++ b/common/checkpointing/snapshot.py
@@ -101,7 +101,7 @@ class Snapshot:
     weight_tensor,
   ) -> None:
     """Loads pretrained embedding from the snapshot to the model.
-       Utilise partial lodaing meachanism from torchsnapshot.
+       Utilise partial loading mechanism from torchsnapshot.
     Args:
       embedding_snapshot: Path to the snapshot containing pretrained embeddings (EBC).
       snapshot_emb_name: Name of the layer in the *snapshot* model, containing the EBC.
diff --git a/core/config/config_load.py b/core/config/config_load.py
index 709da41..e2fac34 100644
--- a/core/config/config_load.py
+++ b/core/config/config_load.py
@@ -11,7 +11,7 @@ def load_config_from_yaml(config_type: Type[BaseConfig], yaml_path: str):
   """Recommend method to load a config file (a yaml file) and parse it.
 
   Because we have a shared filesystem the recommended route to running jobs it put modified config
-  files with the desired parameters somewhere on the filesytem and run jobs pointing to them.
+  files with the desired parameters somewhere on the filesystem and run jobs pointing to them.
   """
 
   def _substitute(s):
diff --git a/core/custom_training_loop.py b/core/custom_training_loop.py
index 0241145..b4d240a 100644
--- a/core/custom_training_loop.py
+++ b/core/custom_training_loop.py
@@ -28,7 +28,7 @@ import torchmetrics as tm
 
 def get_new_iterator(iterable: Iterable):
   """
-  This obtain a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
+  This obtains a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
    getting a new iterator each N steps will avoid memory leak. To avoid the memory leak
    calling iter(iterable) should return a "fresh" iterator using a fresh
    (new instance of) tf.data.Iterator.
@@ -115,7 +115,7 @@ def train(
     dataset: data iterator for the training set
     evaluation_iterators: data iterators for the different evaluation sets
     scheduler: optional learning rate scheduler
-    output_transform_for_metrics: optional transformation functions to transorm the model
+    output_transform_for_metrics: optional transformation functions to transform the model
                                   output and labels into a format the metrics can understand
   """
 
diff --git a/core/debug_training_loop.py b/core/debug_training_loop.py
index 610eea9..c7a1129 100644
--- a/core/debug_training_loop.py
+++ b/core/debug_training_loop.py
@@ -1,6 +1,6 @@
 """This is a very limited feature training loop useful for interactive debugging.
 
-It is not intended for actual model tranining (it is not fast, doesn't compile the model).
+It is not intended for actual model training (it is not fast, doesn't compile the model).
 It does not support checkpointing.
 
 suggested use:
diff --git a/projects/home/recap/FEATURES.md b/projects/home/recap/FEATURES.md
index 2fa54ac..595908c 100644
--- a/projects/home/recap/FEATURES.md
+++ b/projects/home/recap/FEATURES.md
@@ -73,7 +73,7 @@ author (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@@ -112,7 +112,7 @@ original_author (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@@ -544,7 +544,7 @@ user (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@@ -585,7 +585,7 @@ user (48h_real_time_v5)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@@ -1422,7 +1422,7 @@ topic (real_time)
 </td>
 <td>
 <code>
-timelines.enagagement.is_retweeted_without_quote <br>
+timelines.engagement.is_retweeted_without_quote <br>
 timelines.engagement.is_clicked <br>
 timelines.engagement.is_dont_like <br>
 timelines.engagement.is_dwelled <br>
@@ -1460,7 +1460,7 @@ topic (24_hour_real_time)
 </code>
 </td>
 <td>
-<code>timelines.enagagement.is_retweeted_without_quote<br>
+<code>timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_block_clicked<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
@@ -1552,7 +1552,7 @@ These features aggregate values corresponding to a tweet.
 <tr>
 <td><code>tweet (real_time)</code></td>
 <td><code>
-timelines.enagagement.is_retweeted_without_quote<br>
+timelines.engagement.is_retweeted_without_quote<br>
 timelines.engagement.is_clicked<br>
 timelines.engagement.is_dont_like<br>
 timelines.engagement.is_dwelled<br>
@@ -1954,7 +1954,7 @@ recap.tweetfeature.match_ui_lang <br>
 recap.tweetfeature.mention_searcher <br>
 recap.tweetfeature.num_hashtags <br>
 recap.tweetfeature.num_mentions <br>
-recap.tweetfeature.prev_user_tweet_enagagement <br>
+recap.tweetfeature.prev_user_tweet_engagement <br>
 recap.tweetfeature.reply_other <br>
 recap.tweetfeature.reply_searcher <br>
 recap.tweetfeature.retweet_other <br>
@@ -2081,7 +2081,7 @@ in_reply_to_tweet.recap.tweetfeature.is_offensive <br>
 in_reply_to_tweet.recap.tweetfeature.is_reply <br>
 in_reply_to_tweet.recap.tweetfeature.is_sensitive <br>
 in_reply_to_tweet.recap.tweetfeature.num_mentions <br>
-in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_enagagement <br>
+in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_engagement <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectiona_fav_count <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectional_reply_count <br>
 in_reply_to_tweet.recap.tweetfeature.unidirectional_retweet_count <br>
diff --git a/projects/home/recap/data/config.py b/projects/home/recap/data/config.py
index 27ef3ed..81079a9 100644
--- a/projects/home/recap/data/config.py
+++ b/projects/home/recap/data/config.py
@@ -50,7 +50,7 @@ class DatasetConfig(base_config.BaseConfig):
     None, description="Number of shards to keep."
   )
   repeat_files: bool = pydantic.Field(
-    True, description="DEPRICATED. Files are repeated no matter what this is set to."
+    True, description="Deprecated. Files are repeated no matter what this is set to."
   )
   file_batch_size: pydantic.PositiveInt = pydantic.Field(16, description="File batch size")
 
diff --git a/projects/home/recap/data/dataset.py b/projects/home/recap/data/dataset.py
index 3478c68..16d9d25 100644
--- a/projects/home/recap/data/dataset.py
+++ b/projects/home/recap/data/dataset.py
@@ -47,7 +47,7 @@ def to_batch(x, sparse_feature_names: Optional[List[str]] = None) -> RecapBatch:
   try:
     features_in, labels = x
   except ValueError:
-    # For Mode.INFERENCE, we do not expect to recieve labels as part of the input tuple
+    # For Mode.INFERENCE, we do not expect to receive labels as part of the input tuple
     features_in, labels = x, None
 
   sparse_features = keyed_jagged_tensor_from_tensors_dict({})
@@ -398,7 +398,7 @@ class RecapDataset(torch.utils.data.IterableDataset):
         )
     else:
       raise ValueError(
-        "Must specifiy either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
+        "Must specify either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
       )
 
     num_files = len(filenames)
diff --git a/projects/home/recap/data/tfe_parsing.py b/projects/home/recap/data/tfe_parsing.py
index f597746..07770fd 100644
--- a/projects/home/recap/data/tfe_parsing.py
+++ b/projects/home/recap/data/tfe_parsing.py
@@ -15,7 +15,7 @@ def create_tf_example_schema(
   data_config: recap_data_config.SegDenseSchema,
   segdense_schema,
 ):
-  """Generate schema for deseralizing tf.Example.
+  """Generate schema for deserializing tf.Example.
 
   Args:
     segdense_schema: List of dicts of segdense features (includes feature_name, dtype, length).
@@ -58,12 +58,12 @@ def create_tf_example_schema(
 
 @functools.lru_cache(1)
 def make_mantissa_mask(mask_length: int) -> tf.Tensor:
-  """For experimentating with emulating bfloat16 or less precise types."""
+  """For experimenting with emulating bfloat16 or less precise types."""
   return tf.constant((1 << 32) - (1 << mask_length), dtype=tf.int32)
 
 
 def mask_mantissa(tensor: tf.Tensor, mask_length: int) -> tf.Tensor:
-  """For experimentating with emulating bfloat16 or less precise types."""
+  """For experimenting with emulating bfloat16 or less precise types."""
   mask: tf.Tensor = make_mantissa_mask(mask_length)
   return tf.bitcast(tf.bitwise.bitwise_and(tf.bitcast(tensor, tf.int32), mask), tensor.dtype)
 
diff --git a/projects/home/recap/model/config.py b/projects/home/recap/model/config.py
index 47d0640..020f737 100644
--- a/projects/home/recap/model/config.py
+++ b/projects/home/recap/model/config.py
@@ -18,7 +18,7 @@ class DropoutConfig(base_config.BaseConfig):
 
 
 class LayerNormConfig(base_config.BaseConfig):
-  """Configruation for the layer normalization."""
+  """Configuration for the layer normalization."""
 
   epsilon: float = pydantic.Field(
     1e-3, description="Small float added to variance to avoid dividing by zero."
diff --git a/projects/twhin/data/edges.py b/projects/twhin/data/edges.py
index f7864b1..42aebfb 100644
--- a/projects/twhin/data/edges.py
+++ b/projects/twhin/data/edges.py
@@ -96,7 +96,7 @@ class EdgesDataset(Dataset):
 
     Returns a KeyedJaggedTensor used to look up all embeddings.
 
-    Note: We treat the lhs and rhs as though they're separate lookups: `len(lenghts) == 2 * bsz * len(tables)`.
+    Note: We treat the lhs and rhs as though they're separate lookups: `len(lengths) == 2 * bsz * len(tables)`.
     This differs from the DLRM pattern where we have `len(lengths) = bsz * len(tables)`.
 
     For the example above: