Fix additional typos in various comments/docs

This commit is contained in:
Brian Jordan 2023-03-31 15:25:04 -04:00
parent 974d6458af
commit cb1ff279f2
10 changed files with 22 additions and 22 deletions

View File

@ -101,7 +101,7 @@ class Snapshot:
weight_tensor, weight_tensor,
) -> None: ) -> None:
"""Loads pretrained embedding from the snapshot to the model. """Loads pretrained embedding from the snapshot to the model.
Utilise partial lodaing meachanism from torchsnapshot. Utilise partial loading mechanism from torchsnapshot.
Args: Args:
embedding_snapshot: Path to the snapshot containing pretrained embeddings (EBC). embedding_snapshot: Path to the snapshot containing pretrained embeddings (EBC).
snapshot_emb_name: Name of the layer in the *snapshot* model, containing the EBC. snapshot_emb_name: Name of the layer in the *snapshot* model, containing the EBC.

View File

@ -11,7 +11,7 @@ def load_config_from_yaml(config_type: Type[BaseConfig], yaml_path: str):
"""Recommend method to load a config file (a yaml file) and parse it. """Recommend method to load a config file (a yaml file) and parse it.
Because we have a shared filesystem the recommended route to running jobs it put modified config Because we have a shared filesystem the recommended route to running jobs it put modified config
files with the desired parameters somewhere on the filesytem and run jobs pointing to them. files with the desired parameters somewhere on the filesystem and run jobs pointing to them.
""" """
def _substitute(s): def _substitute(s):

View File

@ -28,7 +28,7 @@ import torchmetrics as tm
def get_new_iterator(iterable: Iterable): def get_new_iterator(iterable: Iterable):
""" """
This obtain a new iterator from the iterable. If the iterable uses tf.data.Dataset internally, This obtains a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
getting a new iterator each N steps will avoid memory leak. To avoid the memory leak getting a new iterator each N steps will avoid memory leak. To avoid the memory leak
calling iter(iterable) should return a "fresh" iterator using a fresh calling iter(iterable) should return a "fresh" iterator using a fresh
(new instance of) tf.data.Iterator. (new instance of) tf.data.Iterator.
@ -115,7 +115,7 @@ def train(
dataset: data iterator for the training set dataset: data iterator for the training set
evaluation_iterators: data iterators for the different evaluation sets evaluation_iterators: data iterators for the different evaluation sets
scheduler: optional learning rate scheduler scheduler: optional learning rate scheduler
output_transform_for_metrics: optional transformation functions to transorm the model output_transform_for_metrics: optional transformation functions to transform the model
output and labels into a format the metrics can understand output and labels into a format the metrics can understand
""" """

View File

@ -1,6 +1,6 @@
"""This is a very limited feature training loop useful for interactive debugging. """This is a very limited feature training loop useful for interactive debugging.
It is not intended for actual model tranining (it is not fast, doesn't compile the model). It is not intended for actual model training (it is not fast, doesn't compile the model).
It does not support checkpointing. It does not support checkpointing.
suggested use: suggested use:

View File

@ -73,7 +73,7 @@ author (real_time)
</td> </td>
<td> <td>
<code> <code>
timelines.enagagement.is_retweeted_without_quote <br> timelines.engagement.is_retweeted_without_quote <br>
timelines.engagement.is_clicked <br> timelines.engagement.is_clicked <br>
timelines.engagement.is_dont_like <br> timelines.engagement.is_dont_like <br>
timelines.engagement.is_dwelled <br> timelines.engagement.is_dwelled <br>
@ -112,7 +112,7 @@ original_author (real_time)
</td> </td>
<td> <td>
<code> <code>
timelines.enagagement.is_retweeted_without_quote <br> timelines.engagement.is_retweeted_without_quote <br>
timelines.engagement.is_clicked <br> timelines.engagement.is_clicked <br>
timelines.engagement.is_dont_like <br> timelines.engagement.is_dont_like <br>
timelines.engagement.is_dwelled <br> timelines.engagement.is_dwelled <br>
@ -544,7 +544,7 @@ user (real_time)
</td> </td>
<td> <td>
<code> <code>
timelines.enagagement.is_retweeted_without_quote<br> timelines.engagement.is_retweeted_without_quote<br>
timelines.engagement.is_clicked<br> timelines.engagement.is_clicked<br>
timelines.engagement.is_dont_like<br> timelines.engagement.is_dont_like<br>
timelines.engagement.is_dwelled<br> timelines.engagement.is_dwelled<br>
@ -585,7 +585,7 @@ user (48h_real_time_v5)
</td> </td>
<td> <td>
<code> <code>
timelines.enagagement.is_retweeted_without_quote<br> timelines.engagement.is_retweeted_without_quote<br>
timelines.engagement.is_clicked<br> timelines.engagement.is_clicked<br>
timelines.engagement.is_dont_like<br> timelines.engagement.is_dont_like<br>
timelines.engagement.is_dwelled<br> timelines.engagement.is_dwelled<br>
@ -1422,7 +1422,7 @@ topic (real_time)
</td> </td>
<td> <td>
<code> <code>
timelines.enagagement.is_retweeted_without_quote <br> timelines.engagement.is_retweeted_without_quote <br>
timelines.engagement.is_clicked <br> timelines.engagement.is_clicked <br>
timelines.engagement.is_dont_like <br> timelines.engagement.is_dont_like <br>
timelines.engagement.is_dwelled <br> timelines.engagement.is_dwelled <br>
@ -1460,7 +1460,7 @@ topic (24_hour_real_time)
</code> </code>
</td> </td>
<td> <td>
<code>timelines.enagagement.is_retweeted_without_quote<br> <code>timelines.engagement.is_retweeted_without_quote<br>
timelines.engagement.is_block_clicked<br> timelines.engagement.is_block_clicked<br>
timelines.engagement.is_clicked<br> timelines.engagement.is_clicked<br>
timelines.engagement.is_dont_like<br> timelines.engagement.is_dont_like<br>
@ -1552,7 +1552,7 @@ These features aggregate values corresponding to a tweet.
<tr> <tr>
<td><code>tweet (real_time)</code></td> <td><code>tweet (real_time)</code></td>
<td><code> <td><code>
timelines.enagagement.is_retweeted_without_quote<br> timelines.engagement.is_retweeted_without_quote<br>
timelines.engagement.is_clicked<br> timelines.engagement.is_clicked<br>
timelines.engagement.is_dont_like<br> timelines.engagement.is_dont_like<br>
timelines.engagement.is_dwelled<br> timelines.engagement.is_dwelled<br>
@ -1954,7 +1954,7 @@ recap.tweetfeature.match_ui_lang <br>
recap.tweetfeature.mention_searcher <br> recap.tweetfeature.mention_searcher <br>
recap.tweetfeature.num_hashtags <br> recap.tweetfeature.num_hashtags <br>
recap.tweetfeature.num_mentions <br> recap.tweetfeature.num_mentions <br>
recap.tweetfeature.prev_user_tweet_enagagement <br> recap.tweetfeature.prev_user_tweet_engagement <br>
recap.tweetfeature.reply_other <br> recap.tweetfeature.reply_other <br>
recap.tweetfeature.reply_searcher <br> recap.tweetfeature.reply_searcher <br>
recap.tweetfeature.retweet_other <br> recap.tweetfeature.retweet_other <br>
@ -2081,7 +2081,7 @@ in_reply_to_tweet.recap.tweetfeature.is_offensive <br>
in_reply_to_tweet.recap.tweetfeature.is_reply <br> in_reply_to_tweet.recap.tweetfeature.is_reply <br>
in_reply_to_tweet.recap.tweetfeature.is_sensitive <br> in_reply_to_tweet.recap.tweetfeature.is_sensitive <br>
in_reply_to_tweet.recap.tweetfeature.num_mentions <br> in_reply_to_tweet.recap.tweetfeature.num_mentions <br>
in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_enagagement <br> in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_engagement <br>
in_reply_to_tweet.recap.tweetfeature.unidirectiona_fav_count <br> in_reply_to_tweet.recap.tweetfeature.unidirectiona_fav_count <br>
in_reply_to_tweet.recap.tweetfeature.unidirectional_reply_count <br> in_reply_to_tweet.recap.tweetfeature.unidirectional_reply_count <br>
in_reply_to_tweet.recap.tweetfeature.unidirectional_retweet_count <br> in_reply_to_tweet.recap.tweetfeature.unidirectional_retweet_count <br>

View File

@ -50,7 +50,7 @@ class DatasetConfig(base_config.BaseConfig):
None, description="Number of shards to keep." None, description="Number of shards to keep."
) )
repeat_files: bool = pydantic.Field( repeat_files: bool = pydantic.Field(
True, description="DEPRICATED. Files are repeated no matter what this is set to." True, description="Deprecated. Files are repeated no matter what this is set to."
) )
file_batch_size: pydantic.PositiveInt = pydantic.Field(16, description="File batch size") file_batch_size: pydantic.PositiveInt = pydantic.Field(16, description="File batch size")

View File

@ -47,7 +47,7 @@ def to_batch(x, sparse_feature_names: Optional[List[str]] = None) -> RecapBatch:
try: try:
features_in, labels = x features_in, labels = x
except ValueError: except ValueError:
# For Mode.INFERENCE, we do not expect to recieve labels as part of the input tuple # For Mode.INFERENCE, we do not expect to receive labels as part of the input tuple
features_in, labels = x, None features_in, labels = x, None
sparse_features = keyed_jagged_tensor_from_tensors_dict({}) sparse_features = keyed_jagged_tensor_from_tensors_dict({})
@ -398,7 +398,7 @@ class RecapDataset(torch.utils.data.IterableDataset):
) )
else: else:
raise ValueError( raise ValueError(
"Must specifiy either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config" "Must specify either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
) )
num_files = len(filenames) num_files = len(filenames)

View File

@ -15,7 +15,7 @@ def create_tf_example_schema(
data_config: recap_data_config.SegDenseSchema, data_config: recap_data_config.SegDenseSchema,
segdense_schema, segdense_schema,
): ):
"""Generate schema for deseralizing tf.Example. """Generate schema for deserializing tf.Example.
Args: Args:
segdense_schema: List of dicts of segdense features (includes feature_name, dtype, length). segdense_schema: List of dicts of segdense features (includes feature_name, dtype, length).
@ -58,12 +58,12 @@ def create_tf_example_schema(
@functools.lru_cache(1) @functools.lru_cache(1)
def make_mantissa_mask(mask_length: int) -> tf.Tensor: def make_mantissa_mask(mask_length: int) -> tf.Tensor:
"""For experimentating with emulating bfloat16 or less precise types.""" """For experimenting with emulating bfloat16 or less precise types."""
return tf.constant((1 << 32) - (1 << mask_length), dtype=tf.int32) return tf.constant((1 << 32) - (1 << mask_length), dtype=tf.int32)
def mask_mantissa(tensor: tf.Tensor, mask_length: int) -> tf.Tensor: def mask_mantissa(tensor: tf.Tensor, mask_length: int) -> tf.Tensor:
"""For experimentating with emulating bfloat16 or less precise types.""" """For experimenting with emulating bfloat16 or less precise types."""
mask: tf.Tensor = make_mantissa_mask(mask_length) mask: tf.Tensor = make_mantissa_mask(mask_length)
return tf.bitcast(tf.bitwise.bitwise_and(tf.bitcast(tensor, tf.int32), mask), tensor.dtype) return tf.bitcast(tf.bitwise.bitwise_and(tf.bitcast(tensor, tf.int32), mask), tensor.dtype)

View File

@ -18,7 +18,7 @@ class DropoutConfig(base_config.BaseConfig):
class LayerNormConfig(base_config.BaseConfig): class LayerNormConfig(base_config.BaseConfig):
"""Configruation for the layer normalization.""" """Configuration for the layer normalization."""
epsilon: float = pydantic.Field( epsilon: float = pydantic.Field(
1e-3, description="Small float added to variance to avoid dividing by zero." 1e-3, description="Small float added to variance to avoid dividing by zero."

View File

@ -96,7 +96,7 @@ class EdgesDataset(Dataset):
Returns a KeyedJaggedTensor used to look up all embeddings. Returns a KeyedJaggedTensor used to look up all embeddings.
Note: We treat the lhs and rhs as though they're separate lookups: `len(lenghts) == 2 * bsz * len(tables)`. Note: We treat the lhs and rhs as though they're separate lookups: `len(lengths) == 2 * bsz * len(tables)`.
This differs from the DLRM pattern where we have `len(lengths) = bsz * len(tables)`. This differs from the DLRM pattern where we have `len(lengths) = bsz * len(tables)`.
For the example above: For the example above: