the-algorithm/navi/navi/proto/tensorflow/core/protobuf/data_service.proto
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

89 lines
3.0 KiB
Protocol Buffer

syntax = "proto3";
package tensorflow.data;
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
// Next tag: 2
message ProcessingModeDef {
// Specifies how data is sharded among tf.data service workers.
enum ShardingPolicy {
// No sharding will be performed. Each worker produces the entire dataset
// without any sharding. With this mode, the best practice is to shuffle the
// dataset nondeterministically so that workers process the dataset in
// different orders.
OFF = 0;
// The input dataset is dynamically split among workers at runtime. Each
// worker gets the next split when it reads data from the dispatcher. There
// is no fixed sharding with this mode.
DYNAMIC = 1;
// The following are static sharding policies. The semantics are similar to
// `tf.data.experimental.AutoShardPolicy`. These policies require:
// * The tf.data service cluster has a fixed size, and you need to specify
// the workers in DispatcherConfig.
// * Each client only reads from the local tf.data service worker.
//
// Shards by input files (each worker will get a set of files to process).
// When this option is selected, make sure that there is at least as many
// files as workers. If there are fewer input files than workers, a runtime
// error will be raised.
FILE = 2;
// Shards by elements produced by the dataset. Each worker will process the
// whole dataset and discard the portion that is not for itself. Note that
// for this mode to correctly partitions the dataset elements, the dataset
// needs to produce elements in a deterministic order.
DATA = 3;
// Attempts FILE-based sharding, falling back to DATA-based sharding on
// failures.
FILE_OR_DATA = 4;
// Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a
// placeholder to replace with `shard(num_workers, worker_index)`.
HINT = 5;
}
ShardingPolicy sharding_policy = 1;
}
// tf.data service deployment mode.
enum DeploymentMode {
DEPLOYMENT_MODE_UNSPECIFIED = 0;
// tf.data service workers colocate with TF workers.
DEPLOYMENT_MODE_COLOCATED = 1;
// tf.data service workers run in dedicated tf.data hosts.
DEPLOYMENT_MODE_REMOTE = 2;
// tf.data service workers run in colocated TF hosts and dedicated tf.data
// hosts.
DEPLOYMENT_MODE_HYBRID = 3;
}
// Metadata related to tf.data service datasets.
// Next tag: 4
message DataServiceMetadata {
oneof optional_element_spec {
// Serialized element spec.
bytes element_spec = 1;
}
enum Compression {
COMPRESSION_UNSPECIFIED = 0;
// No compression.
COMPRESSION_OFF = 1;
// Snappy compression as defined in tensorflow/core/platform/snappy.h.
COMPRESSION_SNAPPY = 2;
}
Compression compression = 2;
// Cardinality of the dataset.
int64 cardinality = 3;
}
// Data service config available to the client through GetDataServiceConfig RPC.
// Next tag: 2
message DataServiceConfig {
DeploymentMode deployment_mode = 1;
}