the-algorithm/navi/navi/proto/tensorflow/core/framework/dataset_options.proto

syntax = "proto3";

package tensorflow.data;

import "tensorflow/core/framework/model.proto";

option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/dataset_options_go_proto";

// Represents the type of auto-sharding we enable.
enum AutoShardPolicy {
  // AUTO: Attempts FILE-based sharding, falling back to DATA-based sharding.
  AUTO = 0;
  // FILE: Shards by input files (i.e. each worker will get a set of files to
  // process). When this option is selected, make sure that there is at least as
  // many files as workers. If there are fewer input files than workers, a
  // runtime error will be raised.
  FILE = 1;
  // DATA: Shards by elements produced by the dataset. Each worker will process
  // the whole dataset and discard the portion that is not for itself. Note that
  // for this mode to correctly partitions the dataset elements, the dataset
  // needs to produce elements in a deterministic order.
  DATA = 2;
  // HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated
  // as a placeholder to replace with `shard(num_workers, worker_index)`.
  HINT = 3;
  // OFF: No sharding will be performed.
  OFF = -1;
}

// next: 5
message AutotuneOptions {
  // Whether to automatically tune performance knobs.
  oneof optional_enabled {
    bool enabled = 1;
  }
  // When autotuning is enabled (through autotune), determines the CPU budget to
  // use. Values greater than the number of schedulable CPU cores are allowed
  // but may result in CPU contention.
  oneof optional_cpu_budget {
    int32 cpu_budget = 2;
  }
  // When autotuning is enabled (through autotune), determines the RAM budget to
  // use. Values greater than the available RAM in bytes may result in OOM. If
  // 0, defaults to half of the available RAM in bytes.
  oneof optional_ram_budget {
    int64 ram_budget = 3;
  }

  // When autotuning is enabled (through autotune), determines the algorithm to
  // use. If not explicitly set by user, autotuning will follow HILL_CLIMB
  // algorithm but has more flexibility to tune parameters more aggressively,
  // in which case the behavior is implementation specific and may change over
  // time.
  oneof optional_autotune_algorithm {
    model.AutotuneAlgorithm autotune_algorithm = 4;
  }
}

// next: 2
message CardinalityOptions {
  enum ComputeLevel {
    CARDINALITY_COMPUTE_UNSPECIFIED = 0;
    // Cardinality will only be computed if it can be determined in a cheap
    // manner (ie. without reading from file sources). If the cardinality would
    // be nontrivial to compute, Cardinality() will return UNKNOWN_CARDINALITY.
    CARDINALITY_COMPUTE_LOW = 1;
    // Moderate effort will be made to determine cardinality, such as reading
    // index data from source files. If significant work is needed to compute
    // cardinality (e.g. reading entire source file contents or executing user
    // defined functions), Cardinality() will return UNKNOWN_CARDINALITY.
    CARDINALITY_COMPUTE_MODERATE = 2;
  }
  ComputeLevel compute_level = 1;
}

// next: 3
message DistributeOptions {
  AutoShardPolicy auto_shard_policy = 1;
  // The number of devices attached to this input pipeline.
  oneof optional_num_devices {
    int32 num_devices = 2;
  }
}

// next: 18
message OptimizationOptions {
  // Whether to apply default graph optimizations. If False, only graph
  // optimizations that have been explicitly enabled will be applied.
  oneof optional_apply_default_optimizations {
    bool apply_default_optimizations = 1;
  }
  reserved 2;
  reserved 3;
  reserved 4;
  reserved 5;
  // Whether to fuse filter transformations.
  oneof optional_filter_fusion {
    bool filter_fusion = 6;
  }
  // NOTE: field id 7 deleted in June 2021.
  reserved 7;
  // NOTE: field id 8 deleted in June 2021.
  reserved 8;
  // Whether to fuse map and batch transformations.
  oneof optional_map_and_batch_fusion {
    bool map_and_batch_fusion = 9;
  }
  // Whether to fuse map and filter transformations.
  oneof optional_map_and_filter_fusion {
    bool map_and_filter_fusion = 10;
  }
  // Whether to fuse map transformations.
  oneof optional_map_fusion {
    bool map_fusion = 11;
  }
  // Whether to parallelize stateless map transformations.
  oneof optional_map_parallelization {
    bool map_parallelization = 12;
  }

  // NOTE: field id 13 deleted in June 2021.
  reserved 13;

  // Whether to eliminate no-op transformations.
  oneof optional_noop_elimination {
    bool noop_elimination = 14;
  }
  // Whether to parallelize copying of batch elements. This optimization is
  // highly experimental and can cause performance degradation (e.g. when the
  // parallelization overhead exceeds the benefits of performing the data copies
  // in parallel). You should only enable this optimization if a) your input
  // pipeline is bottlenecked on batching and b) you have validated that this
  // optimization improves performance.
  oneof optional_parallel_batch {
    bool parallel_batch = 15;
  }
  // Field id 16 was removed in 06/2021.
  reserved 16;
  // Whether to fuse shuffle and repeat transformations.
  oneof optional_shuffle_and_repeat_fusion {
    bool shuffle_and_repeat_fusion = 17;
  }
}

// next: 3
message ThreadingOptions {
  // If set, it overrides the maximum degree of intra-op parallelism.
  oneof optional_max_intra_op_parallelism {
    int32 max_intra_op_parallelism = 1;
  }
  // If set, the dataset will use a private threadpool of the given size.
  oneof optional_private_threadpool_size {
    int32 private_threadpool_size = 2;
  }
}

// Represents how to handle external state during serialization.
enum ExternalStatePolicy {
  POLICY_WARN = 0;
  POLICY_IGNORE = 1;
  POLICY_FAIL = 2;
}

// Message stored with Dataset objects to control how datasets are processed and
// optimized.
//
// next: 8
message Options {
  // Whether the outputs need to be produced in deterministic order.
  oneof optional_deterministic {
    bool deterministic = 1;
  }
  // The distribution strategy options associated with the dataset.
  AutotuneOptions autotune_options = 7;
  // The distribution strategy options associated with the dataset.
  DistributeOptions distribute_options = 2;
  // The optimization options associated with the dataset.
  OptimizationOptions optimization_options = 3;
  // Whether to introduce 'slack' in the last `prefetch` of the input pipeline,
  // if it exists. This may reduce CPU contention with accelerator host-side
  // activity at the start of a step. The slack frequency is determined by the
  // number of devices attached to this input pipeline.
  oneof optional_slack {
    bool slack = 4;
  }
  // The threading options associated with the dataset.
  ThreadingOptions threading_options = 5;
  // This option can be used to override the default policy for how to handle
  // external state when serializing a dataset or checkpointing its iterator.
  // There are three settings available - IGNORE: External state is ignored
  // without a warning; WARN: External state is ignored and a warning is logged;
  // FAIL: External state results in an error.
  oneof optional_external_state_policy {
    ExternalStatePolicy external_state_policy = 6;
  }
}