mirror of
https://github.com/twitter/the-algorithm-ml.git
synced 2025-01-24 21:01:11 +01:00
Fix additional typos in various comments/docs
This commit is contained in:
parent
974d6458af
commit
cb1ff279f2
@ -101,7 +101,7 @@ class Snapshot:
|
||||
weight_tensor,
|
||||
) -> None:
|
||||
"""Loads pretrained embedding from the snapshot to the model.
|
||||
Utilise partial lodaing meachanism from torchsnapshot.
|
||||
Utilise partial loading mechanism from torchsnapshot.
|
||||
Args:
|
||||
embedding_snapshot: Path to the snapshot containing pretrained embeddings (EBC).
|
||||
snapshot_emb_name: Name of the layer in the *snapshot* model, containing the EBC.
|
||||
|
@ -11,7 +11,7 @@ def load_config_from_yaml(config_type: Type[BaseConfig], yaml_path: str):
|
||||
"""Recommend method to load a config file (a yaml file) and parse it.
|
||||
|
||||
Because we have a shared filesystem the recommended route to running jobs it put modified config
|
||||
files with the desired parameters somewhere on the filesytem and run jobs pointing to them.
|
||||
files with the desired parameters somewhere on the filesystem and run jobs pointing to them.
|
||||
"""
|
||||
|
||||
def _substitute(s):
|
||||
|
@ -28,7 +28,7 @@ import torchmetrics as tm
|
||||
|
||||
def get_new_iterator(iterable: Iterable):
|
||||
"""
|
||||
This obtain a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
|
||||
This obtains a new iterator from the iterable. If the iterable uses tf.data.Dataset internally,
|
||||
getting a new iterator each N steps will avoid memory leak. To avoid the memory leak
|
||||
calling iter(iterable) should return a "fresh" iterator using a fresh
|
||||
(new instance of) tf.data.Iterator.
|
||||
@ -115,7 +115,7 @@ def train(
|
||||
dataset: data iterator for the training set
|
||||
evaluation_iterators: data iterators for the different evaluation sets
|
||||
scheduler: optional learning rate scheduler
|
||||
output_transform_for_metrics: optional transformation functions to transorm the model
|
||||
output_transform_for_metrics: optional transformation functions to transform the model
|
||||
output and labels into a format the metrics can understand
|
||||
"""
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
"""This is a very limited feature training loop useful for interactive debugging.
|
||||
|
||||
It is not intended for actual model tranining (it is not fast, doesn't compile the model).
|
||||
It is not intended for actual model training (it is not fast, doesn't compile the model).
|
||||
It does not support checkpointing.
|
||||
|
||||
suggested use:
|
||||
|
@ -73,7 +73,7 @@ author (real_time)
|
||||
</td>
|
||||
<td>
|
||||
<code>
|
||||
timelines.enagagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_clicked <br>
|
||||
timelines.engagement.is_dont_like <br>
|
||||
timelines.engagement.is_dwelled <br>
|
||||
@ -112,7 +112,7 @@ original_author (real_time)
|
||||
</td>
|
||||
<td>
|
||||
<code>
|
||||
timelines.enagagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_clicked <br>
|
||||
timelines.engagement.is_dont_like <br>
|
||||
timelines.engagement.is_dwelled <br>
|
||||
@ -544,7 +544,7 @@ user (real_time)
|
||||
</td>
|
||||
<td>
|
||||
<code>
|
||||
timelines.enagagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_clicked<br>
|
||||
timelines.engagement.is_dont_like<br>
|
||||
timelines.engagement.is_dwelled<br>
|
||||
@ -585,7 +585,7 @@ user (48h_real_time_v5)
|
||||
</td>
|
||||
<td>
|
||||
<code>
|
||||
timelines.enagagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_clicked<br>
|
||||
timelines.engagement.is_dont_like<br>
|
||||
timelines.engagement.is_dwelled<br>
|
||||
@ -1422,7 +1422,7 @@ topic (real_time)
|
||||
</td>
|
||||
<td>
|
||||
<code>
|
||||
timelines.enagagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_retweeted_without_quote <br>
|
||||
timelines.engagement.is_clicked <br>
|
||||
timelines.engagement.is_dont_like <br>
|
||||
timelines.engagement.is_dwelled <br>
|
||||
@ -1460,7 +1460,7 @@ topic (24_hour_real_time)
|
||||
</code>
|
||||
</td>
|
||||
<td>
|
||||
<code>timelines.enagagement.is_retweeted_without_quote<br>
|
||||
<code>timelines.engagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_block_clicked<br>
|
||||
timelines.engagement.is_clicked<br>
|
||||
timelines.engagement.is_dont_like<br>
|
||||
@ -1552,7 +1552,7 @@ These features aggregate values corresponding to a tweet.
|
||||
<tr>
|
||||
<td><code>tweet (real_time)</code></td>
|
||||
<td><code>
|
||||
timelines.enagagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_retweeted_without_quote<br>
|
||||
timelines.engagement.is_clicked<br>
|
||||
timelines.engagement.is_dont_like<br>
|
||||
timelines.engagement.is_dwelled<br>
|
||||
@ -1954,7 +1954,7 @@ recap.tweetfeature.match_ui_lang <br>
|
||||
recap.tweetfeature.mention_searcher <br>
|
||||
recap.tweetfeature.num_hashtags <br>
|
||||
recap.tweetfeature.num_mentions <br>
|
||||
recap.tweetfeature.prev_user_tweet_enagagement <br>
|
||||
recap.tweetfeature.prev_user_tweet_engagement <br>
|
||||
recap.tweetfeature.reply_other <br>
|
||||
recap.tweetfeature.reply_searcher <br>
|
||||
recap.tweetfeature.retweet_other <br>
|
||||
@ -2081,7 +2081,7 @@ in_reply_to_tweet.recap.tweetfeature.is_offensive <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.is_reply <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.is_sensitive <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.num_mentions <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_enagagement <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.prev_user_tweet_engagement <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.unidirectiona_fav_count <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.unidirectional_reply_count <br>
|
||||
in_reply_to_tweet.recap.tweetfeature.unidirectional_retweet_count <br>
|
||||
|
@ -50,7 +50,7 @@ class DatasetConfig(base_config.BaseConfig):
|
||||
None, description="Number of shards to keep."
|
||||
)
|
||||
repeat_files: bool = pydantic.Field(
|
||||
True, description="DEPRICATED. Files are repeated no matter what this is set to."
|
||||
True, description="Deprecated. Files are repeated no matter what this is set to."
|
||||
)
|
||||
file_batch_size: pydantic.PositiveInt = pydantic.Field(16, description="File batch size")
|
||||
|
||||
|
@ -47,7 +47,7 @@ def to_batch(x, sparse_feature_names: Optional[List[str]] = None) -> RecapBatch:
|
||||
try:
|
||||
features_in, labels = x
|
||||
except ValueError:
|
||||
# For Mode.INFERENCE, we do not expect to recieve labels as part of the input tuple
|
||||
# For Mode.INFERENCE, we do not expect to receive labels as part of the input tuple
|
||||
features_in, labels = x, None
|
||||
|
||||
sparse_features = keyed_jagged_tensor_from_tensors_dict({})
|
||||
@ -398,7 +398,7 @@ class RecapDataset(torch.utils.data.IterableDataset):
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Must specifiy either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
|
||||
"Must specify either `inputs`, `explicit_datetime_inputs`, or `explicit_date_inputs` in data_config"
|
||||
)
|
||||
|
||||
num_files = len(filenames)
|
||||
|
@ -15,7 +15,7 @@ def create_tf_example_schema(
|
||||
data_config: recap_data_config.SegDenseSchema,
|
||||
segdense_schema,
|
||||
):
|
||||
"""Generate schema for deseralizing tf.Example.
|
||||
"""Generate schema for deserializing tf.Example.
|
||||
|
||||
Args:
|
||||
segdense_schema: List of dicts of segdense features (includes feature_name, dtype, length).
|
||||
@ -58,12 +58,12 @@ def create_tf_example_schema(
|
||||
|
||||
@functools.lru_cache(1)
|
||||
def make_mantissa_mask(mask_length: int) -> tf.Tensor:
|
||||
"""For experimentating with emulating bfloat16 or less precise types."""
|
||||
"""For experimenting with emulating bfloat16 or less precise types."""
|
||||
return tf.constant((1 << 32) - (1 << mask_length), dtype=tf.int32)
|
||||
|
||||
|
||||
def mask_mantissa(tensor: tf.Tensor, mask_length: int) -> tf.Tensor:
|
||||
"""For experimentating with emulating bfloat16 or less precise types."""
|
||||
"""For experimenting with emulating bfloat16 or less precise types."""
|
||||
mask: tf.Tensor = make_mantissa_mask(mask_length)
|
||||
return tf.bitcast(tf.bitwise.bitwise_and(tf.bitcast(tensor, tf.int32), mask), tensor.dtype)
|
||||
|
||||
|
@ -18,7 +18,7 @@ class DropoutConfig(base_config.BaseConfig):
|
||||
|
||||
|
||||
class LayerNormConfig(base_config.BaseConfig):
|
||||
"""Configruation for the layer normalization."""
|
||||
"""Configuration for the layer normalization."""
|
||||
|
||||
epsilon: float = pydantic.Field(
|
||||
1e-3, description="Small float added to variance to avoid dividing by zero."
|
||||
|
@ -96,7 +96,7 @@ class EdgesDataset(Dataset):
|
||||
|
||||
Returns a KeyedJaggedTensor used to look up all embeddings.
|
||||
|
||||
Note: We treat the lhs and rhs as though they're separate lookups: `len(lenghts) == 2 * bsz * len(tables)`.
|
||||
Note: We treat the lhs and rhs as though they're separate lookups: `len(lengths) == 2 * bsz * len(tables)`.
|
||||
This differs from the DLRM pattern where we have `len(lengths) = bsz * len(tables)`.
|
||||
|
||||
For the example above:
|
||||
|
Loading…
x
Reference in New Issue
Block a user