301 lines
10 KiB
Protocol Buffer
301 lines
10 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package tensorflow;
|
|
|
|
import "tensorflow/core/framework/tensor.proto";
|
|
import "tensorflow/core/protobuf/graph_debug_info.proto";
|
|
|
|
option cc_enable_arenas = true;
|
|
option java_outer_classname = "DebugEventProtos";
|
|
option java_multiple_files = true;
|
|
option java_package = "org.tensorflow.util";
|
|
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
|
|
|
|
// Available modes for extracting debugging information from a Tensor.
|
|
// TODO(cais): Document the detailed column names and semantics in a separate
|
|
// markdown file once the implementation settles.
|
|
enum TensorDebugMode {
|
|
UNSPECIFIED = 0;
|
|
|
|
// Only records what tensors are computed, eagerly or in graphs.
|
|
// No information regarding the value of the tensor is available.
|
|
NO_TENSOR = 1;
|
|
|
|
// A minimalist health summary for float-type tensors.
|
|
// Contains information only about the presence/absence of pathological
|
|
// values including Infinity and NaN.
|
|
// Applicable only to float dtypes.
|
|
CURT_HEALTH = 2;
|
|
|
|
// A concise health summary for float-type tensors.
|
|
// Contains more information that CURT_HEALTH.
|
|
// Infinity and NaN are treated differently.
|
|
// Applicable only to float and integer dtypes.
|
|
CONCISE_HEALTH = 3;
|
|
|
|
// A detailed health summary.
|
|
// Contains further detailed information than `CONCISE_HEALTH`.
|
|
// Information about device, dtype and shape are included.
|
|
// Counts for various types of values (Infinity, NaN, negative, zero,
|
|
// positive) are included.
|
|
// Applicable to float, integer and boolean dtypes.
|
|
FULL_HEALTH = 4;
|
|
|
|
// Provides full runtime shape information, up to a maximum rank, beyond
|
|
// which the dimension sizes are truncated.
|
|
SHAPE = 5;
|
|
|
|
// Full numeric summary.
|
|
// Including device, dtype, shape, counts of various types of values
|
|
// (Infinity, NaN, negative, zero, positive), and summary statistics
|
|
// (minimum, maximum, mean and variance).
|
|
// Applicable to float, integer and boolean dtypes.
|
|
FULL_NUMERICS = 6;
|
|
|
|
// Full tensor value.
|
|
FULL_TENSOR = 7;
|
|
|
|
// Reduce the elements of a tensor to a rank-1 tensor of shape [3], in which
|
|
// - the 1st element is -inf if any element of the tensor is -inf,
|
|
// or zero otherwise.
|
|
// - the 2nd element is +inf if any element of the tensor is +inf,
|
|
// or zero otherwise.
|
|
// - the 3rd element is nan if any element of the tensor is nan, or zero
|
|
// otherwise.
|
|
REDUCE_INF_NAN_THREE_SLOTS = 8;
|
|
}
|
|
|
|
// An Event related to the debugging of a TensorFlow program.
|
|
message DebugEvent {
|
|
// Timestamp in seconds (with microsecond precision).
|
|
double wall_time = 1;
|
|
|
|
// Step of training (if available).
|
|
int64 step = 2;
|
|
|
|
oneof what {
|
|
// Metadata related to this debugging data.
|
|
DebugMetadata debug_metadata = 3;
|
|
|
|
// The content of a source file.
|
|
SourceFile source_file = 4;
|
|
|
|
// A stack frame (filename, line number and column number, function name and
|
|
// code string) with ID.
|
|
StackFrameWithId stack_frame_with_id = 6;
|
|
|
|
// The creation of an op within a graph (e.g., a FuncGraph compiled from
|
|
// a Python function).
|
|
GraphOpCreation graph_op_creation = 7;
|
|
|
|
// Information about a debugged graph.
|
|
DebuggedGraph debugged_graph = 8;
|
|
|
|
// Execution of an op or a Graph (e.g., a tf.function).
|
|
Execution execution = 9;
|
|
|
|
// A graph execution trace: Contains information about the intermediate
|
|
// tensors computed during the graph execution.
|
|
GraphExecutionTrace graph_execution_trace = 10;
|
|
|
|
// The ID of the graph (i.e., FuncGraph) executed here: applicable only
|
|
// to the execution of a FuncGraph.
|
|
string graph_id = 11;
|
|
|
|
// A device on which debugger-instrumented ops and/or tensors reside.
|
|
DebuggedDevice debugged_device = 12;
|
|
}
|
|
}
|
|
|
|
// Metadata about the debugger and the debugged TensorFlow program.
|
|
message DebugMetadata {
|
|
// Version of TensorFlow.
|
|
string tensorflow_version = 1;
|
|
|
|
// Version of the DebugEvent file format.
|
|
// Has a format of "debug.Event:<number>", e.g., "debug.Event:1".
|
|
string file_version = 2;
|
|
|
|
// A unique ID for the current run of tfdbg.
|
|
// A run of tfdbg is defined as a TensorFlow job instrumented by tfdbg.
|
|
// Multiple hosts in a distributed TensorFlow job instrumented by tfdbg
|
|
// have the same ID.
|
|
string tfdbg_run_id = 3;
|
|
}
|
|
|
|
// Content of a source file involved in the execution of the debugged TensorFlow
|
|
// program.
|
|
message SourceFile {
|
|
// Path to the file.
|
|
string file_path = 1;
|
|
|
|
// Name of the host on which the file is located.
|
|
string host_name = 2;
|
|
|
|
// Line-by-line content of the file.
|
|
repeated string lines = 3;
|
|
}
|
|
|
|
// A stack frame with ID.
|
|
message StackFrameWithId {
|
|
// A unique ID for the stack frame: A UUID-like string.
|
|
string id = 1;
|
|
|
|
// Stack frame, i.e., a frame of a stack trace, containing information
|
|
// regarding the file name, line number, function name, code content
|
|
// of the line, and column number (if available).
|
|
GraphDebugInfo.FileLineCol file_line_col = 2;
|
|
}
|
|
|
|
// Code location information: A stack trace with host-name information.
|
|
// Instead of encoding the detailed stack trace, this proto refers to IDs of
|
|
// stack frames stored as `StackFrameWithId` protos.
|
|
message CodeLocation {
|
|
// Host name on which the source files are located.
|
|
string host_name = 1;
|
|
|
|
// ID to a stack frame, each of which is pointed to
|
|
// by a unique ID. The ordering of the frames is consistent with Python's
|
|
// `traceback.extract_tb()`.
|
|
repeated string stack_frame_ids = 2;
|
|
}
|
|
|
|
// The creation of an op in a TensorFlow Graph (e.g., FuncGraph in TF2).
|
|
message GraphOpCreation {
|
|
// Type of the op (e.g., "MatMul").
|
|
string op_type = 1;
|
|
|
|
// Name of the op (e.g., "Dense/MatMul_1").
|
|
string op_name = 2;
|
|
|
|
// Name of the graph that the op is a part of (if available).
|
|
string graph_name = 3;
|
|
|
|
// Unique ID of the graph (generated by debugger).
|
|
// This is the ID of the immediately-enclosing graph.
|
|
string graph_id = 4;
|
|
|
|
// Name of the device that the op is assigned to (if available).
|
|
string device_name = 5;
|
|
|
|
// Names of the input tensors to the op.
|
|
repeated string input_names = 6;
|
|
|
|
// Number of output tensors emitted by the op.
|
|
int32 num_outputs = 7;
|
|
|
|
// The unique ID for code location (stack trace) of the op's creation.
|
|
CodeLocation code_location = 8;
|
|
|
|
// Unique IDs for the output tensors of this op.
|
|
repeated int32 output_tensor_ids = 9;
|
|
}
|
|
|
|
// A debugger-instrumented graph.
|
|
message DebuggedGraph {
|
|
// An ID for the graph.
|
|
// This can be used up to look up graph names. Generated by the debugger.
|
|
string graph_id = 1;
|
|
|
|
// Name of the graph (if available).
|
|
string graph_name = 2;
|
|
|
|
// Names of the instrumented ops. This can be used to look up op name
|
|
// based on the numeric-summary tensors (2nd column).
|
|
repeated string instrumented_ops = 3;
|
|
|
|
// Original (uninstrumented) GraphDef (if available).
|
|
bytes original_graph_def = 4;
|
|
|
|
// An encoded version of a GraphDef.
|
|
// This graph may include the debugger-inserted ops.
|
|
bytes instrumented_graph_def = 5;
|
|
|
|
// IDs of the immediate enclosing context (graph), if any.
|
|
string outer_context_id = 6;
|
|
}
|
|
|
|
// A device on which ops and/or tensors are instrumented by the debugger.
|
|
message DebuggedDevice {
|
|
// Name of the device.
|
|
string device_name = 1;
|
|
|
|
// A debugger-generated ID for the device. Guaranteed to be unique within
|
|
// the scope of the debugged TensorFlow program, including single-host and
|
|
// multi-host settings.
|
|
// TODO(cais): Test the uniqueness guarantee in multi-host settings.
|
|
int32 device_id = 2;
|
|
}
|
|
|
|
// Data relating to the eager execution of an op or a Graph.
|
|
// For a op that generates N output tensors (N >= 0), only one
|
|
// Execution proto will be used to describe the execution event.
|
|
message Execution {
|
|
// Op type (e.g., "MatMul").
|
|
// In the case of a Graph, this is the name of the Graph.
|
|
string op_type = 1;
|
|
|
|
// Number of output tensors.
|
|
int32 num_outputs = 2;
|
|
|
|
// The graph that's executed: applicable only to the eager
|
|
// execution of a FuncGraph.
|
|
string graph_id = 3;
|
|
|
|
// IDs of the input tensors (if available).
|
|
repeated int64 input_tensor_ids = 4;
|
|
|
|
// IDs of the output tensors (if availbable).
|
|
// If specified, must have the same length as tensor_protos.
|
|
repeated int64 output_tensor_ids = 5;
|
|
|
|
// Type of the tensor value encapsulated in this proto.
|
|
TensorDebugMode tensor_debug_mode = 6;
|
|
|
|
// Output Tensor values in the type described by `tensor_value_type`.
|
|
// The length of this should match `num_outputs`.
|
|
repeated TensorProto tensor_protos = 7;
|
|
|
|
// Stack trace of the eager execution.
|
|
CodeLocation code_location = 8;
|
|
|
|
// Debugged-generated IDs of the devices on which the output tensors reside.
|
|
// To look up details about the device (e.g., name), cross-reference this
|
|
// field with the DebuggedDevice messages.
|
|
repeated int32 output_tensor_device_ids = 9;
|
|
|
|
// TODO(cais): When backporting to V1 Session.run() support, add more fields
|
|
// such as fetches and feeds.
|
|
}
|
|
|
|
// Data relating to an execution of a Graph (e.g., an eager execution of a
|
|
// FuncGraph).
|
|
// The values of the intermediate tensors computed in the graph are recorded
|
|
// in this proto. A graph execution may correspond to one or more pieces of
|
|
// `GraphExecutionTrace`, depending on whether the instrumented tensor values
|
|
// are summarized in an aggregated or separate fashion.
|
|
message GraphExecutionTrace {
|
|
// Unique ID of the context that the executed op(s) belong to (e.g., a
|
|
// compiled concrete tf.function).
|
|
string tfdbg_context_id = 1;
|
|
|
|
// Name of the op (applicable only in the case of the `FULL_TENSOR` trace
|
|
// level).
|
|
string op_name = 2;
|
|
|
|
// Output slot of the tensor (applicable only in the case of the `FULL_TENSOR`
|
|
// trace level).
|
|
int32 output_slot = 3;
|
|
|
|
// Type of the tensor value encapsulated in this proto.
|
|
TensorDebugMode tensor_debug_mode = 4;
|
|
|
|
// Tensor value in the type described by `tensor_value_type`.
|
|
// This tensor may summarize the value of a single intermediate op of the
|
|
// graph, or those of multiple intermediate tensors.
|
|
TensorProto tensor_proto = 5;
|
|
|
|
// Name of the device that the op belongs to.
|
|
string device_name = 6;
|
|
}
|