Update speed_check(), add type hints.

- Print batch performance metrics when idx is greater than zero and not calculate and change each batch.
- Generate a ValueError when placing an Exception when an iterable generates a problem.
This commit is contained in:
Bruno Rodrigues Faria 2023-04-02 13:11:15 -03:00 committed by GitHub
parent 78c3235eee
commit 9029e9ee2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,20 +1,17 @@
"""Reader utilities."""
import itertools
import time
from typing import Optional
from tml.common.batch import DataclassBatch
from tml.ml_logging.torch_logging import logging
import pyarrow as pa
import torch
from tml.common.batch import DataclassBatch
from tml.ml_logging.torch_logging import logging
def roundrobin(*iterables):
"""Round robin through provided iterables, useful for simple load balancing.
Adapted from https://docs.python.org/3/library/itertools.html.
"""
num_active = len(iterables)
nexts = itertools.cycle(iter(it).__next__ for it in iterables)
@ -24,36 +21,30 @@ def roundrobin(*iterables):
result = _next()
yield result
except StopIteration:
# Remove the iterator we just exhausted from the cycle.
num_active -= 1
nexts = itertools.cycle(itertools.islice(nexts, num_active))
logging.warning(f"Iterable exhausted, {num_active} iterables left.")
except Exception as exc:
logging.warning(f"Iterable raised exception {exc}, ignoring.")
# continue
raise
def speed_check(data_loader, max_steps: int, frequency: int, peek: Optional[int]):
num_examples = 0
prev = time.perf_counter()
for idx, batch in enumerate(data_loader):
if idx > max_steps:
break
if peek and idx % peek == 0:
logging.info(f"Batch: {batch}")
num_examples += batch.batch_size
if idx % frequency == 0:
if idx % frequency == 0 and idx > 0:
now = time.perf_counter()
elapsed = now - prev
examples_per_second = batch.batch_size / elapsed
logging.info(
f"step: {idx}, "
f"elapsed(s): {elapsed}, "
f"examples: {num_examples}, "
f"ex/s: {num_examples / elapsed}, "
f"step: {idx}, elapsed(s): {elapsed:.2f}, examples: {batch.batch_size}, "
f"ex/s: {examples_per_second:.2f}"
)
prev = now
num_examples = 0
def pa_to_torch(array: pa.array) -> torch.Tensor:
@ -71,7 +62,7 @@ def create_default_pa_to_batch(schema) -> DataclassBatch:
pa.string(): pa.scalar("", type=pa.string()),
}
if pa_type not in type_map:
raise Exception(f"Imputation for type {pa_type} not supported.")
raise ValueError(f"Imputation for type {pa_type} not supported.")
return type_map[pa_type]
def _impute(array: pa.array) -> pa.array: