use crate::{MAX_NUM_INPUTS, MAX_NUM_MODELS, MAX_NUM_OUTPUTS}; use arrayvec::ArrayVec; use clap::Parser; use log::info; use once_cell::sync::OnceCell; use std::error::Error; use time::OffsetDateTime; use time::format_description::well_known::Rfc3339; #[derive(Parser, Debug, Clone)] ///Navi is configured through CLI arguments(for now) defined below. //TODO: use clap_serde to make it config file driven pub struct Args { #[clap(short, long, help = "gRPC port Navi runs ons")] pub port: i32, #[clap(long, default_value_t = 9000, help = "prometheus metrics port")] pub prometheus_port: u16, #[clap( short, long, default_value_t = 1, help = "number of worker threads for tokio async runtime" )] pub num_worker_threads: usize, #[clap( long, default_value_t = 14, help = "number of blocking threads in tokio blocking thread pool" )] pub max_blocking_threads: usize, #[clap(long, default_value = "16", help = "maximum batch size for a batch")] pub max_batch_size: Vec, #[clap( short, long, default_value = "2", help = "max wait time for accumulating a batch" )] pub batch_time_out_millis: Vec, #[clap( long, default_value_t = 90, help = "threshold to start dropping batches under stress" )] pub batch_drop_millis: u64, #[clap( long, default_value_t = 300, help = "polling interval for new version of a model and META.json config" )] pub model_check_interval_secs: u64, #[clap( short, long, default_value = "models/pvideo/", help = "root directory for models" )] pub model_dir: Vec, #[clap( long, help = "directory containing META.json config. separate from model_dir to facilitate remote config management" )] pub meta_json_dir: Option, #[clap(short, long, default_value = "", help = "directory for ssl certs")] pub ssl_dir: String, #[clap( long, help = "call out to external process to check model updates. custom logic can be written to pull from hdfs, gcs etc" )] pub modelsync_cli: Option, #[clap( long, default_value_t = 1, help = "specify how many versions Navi retains in memory. good for cases of rolling model upgrade" )] pub versions_per_model: usize, #[clap( short, long, help = "most runtimes support loading ops custom writen. currently only implemented for TF" )] pub customops_lib: Option, #[clap( long, default_value = "8", help = "number of threads to paralleling computations inside an op" )] pub intra_op_parallelism: Vec, #[clap( long, help = "number of threads to parallelize computations of the graph" )] pub inter_op_parallelism: Vec, #[clap( long, help = "signature of a serving. only TF" )] pub serving_sig: Vec, #[clap(long, default_value = "examples", help = "name of each input tensor")] pub input: Vec, #[clap(long, default_value = "output_0", help = "name of each output tensor")] pub output: Vec, #[clap( long, default_value_t = 500, help = "max warmup records to use. warmup only implemented for TF" )] pub max_warmup_records: usize, #[clap(long, value_parser = Args::parse_key_val::, value_delimiter=',')] pub onnx_global_thread_pool_options: Vec<(String, String)>, #[clap( long, default_value = "true", help = "when to use graph parallelization. only for ONNX" )] pub onnx_use_parallel_mode: String, // #[clap(long, default_value = "false")] // pub onnx_use_onednn: String, #[clap( long, default_value = "true", help = "trace internal memory allocation and generate bulk memory allocations. only for ONNX. turn if off if batch size dynamic" )] pub onnx_use_memory_pattern: String, #[clap(long, value_parser = Args::parse_key_val::, value_delimiter=',')] pub onnx_ep_options: Vec<(String, String)>, #[clap(long, help = "choice of gpu EPs for ONNX: cuda or tensorrt")] pub onnx_gpu_ep: Option, #[clap( long, default_value = "home", help = "converter for various input formats" )] pub onnx_use_converter: Option, #[clap( long, help = "whether to enable runtime profiling. only implemented for ONNX for now" )] pub profiling: Option, #[clap( long, default_value = "", help = "metrics reporting for discrete features. only for Home converter for now" )] pub onnx_report_discrete_feature_ids: Vec, #[clap( long, default_value = "", help = "metrics reporting for continuous features. only for Home converter for now" )] pub onnx_report_continuous_feature_ids: Vec, } impl Args { pub fn get_model_specs(model_dir: Vec) -> Vec { let model_specs = model_dir .iter() //let it panic if some model_dir are wrong .map(|dir| { dir.trim_end_matches('/') .rsplit_once('/') .unwrap() .1 .to_owned() }) .collect(); info!("all model_specs: {:?}", model_specs); model_specs } pub fn version_str_to_epoch(dt_str: &str) -> Result { dt_str .parse::() .or_else(|_| { let ts = OffsetDateTime::parse(dt_str, &Rfc3339) .map(|d| (d.unix_timestamp_nanos()/1_000_000) as i64); if ts.is_ok() { info!("original version {} -> {}", dt_str, ts.unwrap()); } ts }) .map_err(anyhow::Error::msg) } /// Parse a single key-value pair fn parse_key_val(s: &str) -> Result<(T, U), Box> where T: std::str::FromStr, T::Err: Error + Send + Sync + 'static, U: std::str::FromStr, U::Err: Error + Send + Sync + 'static, { let pos = s .find('=') .ok_or_else(|| format!("invalid KEY=value: no `=` found in `{}`", s))?; Ok((s[..pos].parse()?, s[pos + 1..].parse()?)) } } lazy_static! { pub static ref ARGS: Args = Args::parse(); pub static ref MODEL_SPECS: ArrayVec = { let mut specs = ArrayVec::::new(); Args::get_model_specs(ARGS.model_dir.clone()) .into_iter() .for_each(|m| specs.push(m)); specs }; pub static ref INPUTS: ArrayVec>, MAX_NUM_MODELS> = { let mut inputs = ArrayVec::>, MAX_NUM_MODELS>::new(); for (idx, o) in ARGS.input.iter().enumerate() { if o.trim().is_empty() { info!("input spec is empty for model {}, auto detect later", idx); inputs.push(OnceCell::new()); } else { inputs.push(OnceCell::with_value( o.split(",") .map(|s| s.to_owned()) .collect::>(), )); } } info!("all inputs:{:?}", inputs); inputs }; pub static ref OUTPUTS: ArrayVec, MAX_NUM_MODELS> = { let mut outputs = ArrayVec::, MAX_NUM_MODELS>::new(); for o in ARGS.output.iter() { outputs.push( o.split(",") .map(|s| s.to_owned()) .collect::>(), ); } info!("all outputs:{:?}", outputs); outputs }; }