diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 89d9fb80ce..590e1e71e5 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -9,6 +9,12 @@ dependencies = [ "gimli", ] +[[package]] +name = "adler32" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" + [[package]] name = "aho-corasick" version = "0.7.10" @@ -89,6 +95,15 @@ dependencies = [ "time", ] +[[package]] +name = "crc32fast" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.4.2" @@ -141,6 +156,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "futures" version = "0.1.29" @@ -170,6 +191,12 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c" +[[package]] +name = "hex" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" + [[package]] name = "itoa" version = "0.4.5" @@ -188,6 +215,8 @@ dependencies = [ "nix 0.17.0", "oci", "prctl", + "procfs", + "prometheus", "protobuf", "protocols", "regex", @@ -214,6 +243,24 @@ version = "0.2.70" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3baa92041a6fec78c687fa0cc2b3fae8884f743d672cf551bed1d6dac6988d0f" +[[package]] +name = "libflate" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1fbe6b967a94346446d37ace319ae85be7eca261bb8149325811ac435d35d64" +dependencies = [ + "adler32", + "crc32fast", + "libflate_lz77", + "rle-decode-fast", +] + +[[package]] +name = "libflate_lz77" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3286f09f7d4926fc486334f28d8d2e6ebe4f7f9994494b6dab27ddfad2c9b11b" + [[package]] name = "log" version = "0.4.8" @@ -363,6 +410,37 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "procfs" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c434e93ef69c216e68e4f417c927b4f31502c3560b72cfdb6827e2321c5c6b3e" +dependencies = [ + "bitflags", + "byteorder", + "chrono", + "hex", + "lazy_static", + "libc", + "libflate", +] + +[[package]] +name = "prometheus" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0ced56dee39a6e960c15c74dc48849d614586db2eaada6497477af7c7811cd" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "procfs", + "protobuf", + "spin", + "thiserror", +] + [[package]] name = "protobuf" version = "2.14.0" @@ -480,6 +558,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "rle-decode-fast" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cabe4fa914dec5870285fa7f71f602645da47c486e68486d2b4ceb4a343e90ac" + [[package]] name = "rustc-demangle" version = "0.1.16" @@ -626,6 +710,12 @@ dependencies = [ "slog", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "syn" version = "1.0.25" @@ -657,6 +747,26 @@ dependencies = [ "winapi", ] +[[package]] +name = "thiserror" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13f926965ad00595dd129fa12823b04bbf866e9085ab0a5f2b05b850fbfc344" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "893582086c2f98cde18f906265a65b5030a074b1046c674ae898be6519a7f479" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.0.1" diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index b4b7d77481..b490e6f675 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -30,6 +30,8 @@ slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "rele slog-scope = "4.1.2" # for testing tempfile = "3.1.0" +prometheus = { version = "0.9.0", features = ["process"] } +procfs = "0.7.9" [workspace] members = [ diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 9b0c0405f0..014b677833 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -12,6 +12,7 @@ extern crate lazy_static; extern crate oci; extern crate prctl; +extern crate prometheus; extern crate protocols; extern crate regex; extern crate rustjail; @@ -51,6 +52,7 @@ use unistd::Pid; mod config; mod device; mod linux_abi; +mod metrics; mod mount; mod namespace; mod network; diff --git a/src/agent/src/metrics.rs b/src/agent/src/metrics.rs new file mode 100644 index 0000000000..5538845c9d --- /dev/null +++ b/src/agent/src/metrics.rs @@ -0,0 +1,489 @@ +extern crate procfs; + +use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder}; + +use protocols; +use rustjail::errors::*; + +const NAMESPACE_KATA_AGENT: &str = "kata_agent"; +const NAMESPACE_KATA_GUEST: &str = "kata_guest"; + +// Convenience macro to obtain the scope logger +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "metrics")) + }; +} + +lazy_static! { + + static ref AGENT_SCRAPE_COUNT: IntCounter = + prometheus::register_int_counter!(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count").as_ref(), "Metrics scrape count").unwrap(); + + static ref AGENT_THREADS: Gauge = + prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads").as_ref(), "Agent process threads").unwrap(); + + static ref AGENT_TOTAL_TIME: Gauge = + prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time").as_ref(), "Agent process total time").unwrap(); + + static ref AGENT_TOTAL_VM: Gauge = + prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm").as_ref(), "Agent process total VM size").unwrap(); + + static ref AGENT_TOTAL_RSS: Gauge = + prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss").as_ref(), "Agent process total RSS size").unwrap(); + + static ref AGENT_PROC_STATUS: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status").as_ref(), "Agent process status.", &["item"]).unwrap(); + + static ref AGENT_IO_STAT: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat").as_ref(), "Agent process IO statistics.", &["item"]).unwrap(); + + static ref AGENT_PROC_STAT: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat").as_ref(), "Agent process statistics.", &["item"]).unwrap(); + + // guest os metrics + static ref GUEST_LOAD: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"load").as_ref() , "Guest system load.", &["item"]).unwrap(); + + static ref GUEST_TASKS: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks").as_ref() , "Guest system load.", &["item"]).unwrap(); + + static ref GUEST_CPU_TIME: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time").as_ref() , "Guest CPU statistics.", &["cpu","item"]).unwrap(); + + static ref GUEST_VM_STAT: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat").as_ref() , "Guest virtual memory statistics.", &["item"]).unwrap(); + + static ref GUEST_NETDEV_STAT: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat").as_ref() , "Guest net devices statistics.", &["interface","item"]).unwrap(); + + static ref GUEST_DISKSTAT: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat").as_ref() , "Disks statistics in system.", &["disk","item"]).unwrap(); + + static ref GUEST_MEMINFO: GaugeVec = + prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap(); +} + +pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result { + AGENT_SCRAPE_COUNT.inc(); + + // update agent process metrics + update_agent_metrics(); + + // update guest os metrics + update_guest_metrics(); + + // gather all metrics and return as a String + let metric_families = prometheus::gather(); + + let mut buffer = Vec::new(); + let encoder = TextEncoder::new(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + Ok(String::from_utf8(buffer.clone()).unwrap()) +} + +fn update_agent_metrics() { + let me = procfs::process::Process::myself(); + match me { + Err(err) => { + error!(sl!(), "failed to create process instance: {:?}", err); + return; + } + Ok(_) => {} + } + + let me = me.unwrap(); + + let tps = procfs::ticks_per_second().unwrap(); + + // process total time + AGENT_TOTAL_TIME.set((me.stat.utime + me.stat.stime) as f64 / (tps as f64)); + + // Total virtual memory used + AGENT_TOTAL_VM.set(me.stat.vsize as f64); + + // Total resident set + let page_size = procfs::page_size().unwrap() as f64; + AGENT_TOTAL_RSS.set(me.stat.rss as f64 * page_size); + + // io + match me.io() { + Err(err) => { + info!(sl!(), "failed to get process io stat: {:?}", err); + } + Ok(io) => { + set_gauge_vec_proc_io(&AGENT_IO_STAT, &io); + } + } + + match me.stat() { + Err(err) => { + info!(sl!(), "failed to get process stat: {:?}", err); + } + Ok(stat) => { + set_gauge_vec_proc_stat(&AGENT_PROC_STAT, &stat); + } + } + + match me.status() { + Err(err) => { + info!(sl!(), "failed to get process status: {:?}", err); + } + Ok(status) => set_gauge_vec_proc_status(&AGENT_PROC_STATUS, &status), + } +} + +fn update_guest_metrics() { + // try get load and task info + match procfs::LoadAverage::new() { + Err(err) => { + info!(sl!(), "failed to get guest LoadAverage: {:?}", err); + } + Ok(load) => { + GUEST_LOAD + .with_label_values(&["load1"]) + .set(load.one as f64); + GUEST_LOAD + .with_label_values(&["load5"]) + .set(load.five as f64); + GUEST_LOAD + .with_label_values(&["load15"]) + .set(load.fifteen as f64); + GUEST_TASKS.with_label_values(&["cur"]).set(load.cur as f64); + GUEST_TASKS.with_label_values(&["max"]).set(load.max as f64); + } + } + + // try to get disk stats + match procfs::diskstats() { + Err(err) => { + info!(sl!(), "failed to get guest diskstats: {:?}", err); + } + Ok(diskstats) => { + for diskstat in diskstats { + set_gauge_vec_diskstat(&GUEST_DISKSTAT, &diskstat); + } + } + } + + // try to get vm stats + match procfs::vmstat() { + Err(err) => { + info!(sl!(), "failed to get guest vmstat: {:?}", err); + } + Ok(vmstat) => { + for (k, v) in vmstat { + GUEST_VM_STAT.with_label_values(&[k.as_str()]).set(v as f64); + } + } + } + + // cpu stat + match procfs::KernelStats::new() { + Err(err) => { + info!(sl!(), "failed to get guest KernelStats: {:?}", err); + } + Ok(kernel_stats) => { + set_gauge_vec_CPU_time(&GUEST_CPU_TIME, "total", &kernel_stats.total); + for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() { + set_gauge_vec_CPU_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time); + } + } + } + + // try to get net device stats + match procfs::net::dev_status() { + Err(err) => { + info!(sl!(), "failed to get guest net::dev_status: {:?}", err); + } + Ok(devs) => { + // netdev: map[string]procfs::net::DeviceStatus + for (_, status) in devs { + set_gauge_vec_netdev(&GUEST_NETDEV_STAT, &status); + } + } + } + + // get statistics about memory from /proc/meminfo + match procfs::Meminfo::new() { + Err(err) => { + info!(sl!(), "failed to get guest Meminfo: {:?}", err); + } + Ok(meminfo) => { + set_gauge_vec_meminfo(&GUEST_MEMINFO, &meminfo); + } + } +} + +fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) { + gv.with_label_values(&["mem_total"]) + .set(meminfo.mem_total as f64); + gv.with_label_values(&["mem_free"]) + .set(meminfo.mem_free as f64); + gv.with_label_values(&["mem_available"]) + .set(meminfo.mem_available.unwrap_or(0) as f64); + gv.with_label_values(&["buffers"]) + .set(meminfo.buffers as f64); + gv.with_label_values(&["cached"]).set(meminfo.cached as f64); + gv.with_label_values(&["swap_cached"]) + .set(meminfo.swap_cached as f64); + gv.with_label_values(&["active"]).set(meminfo.active as f64); + gv.with_label_values(&["inactive"]) + .set(meminfo.inactive as f64); + gv.with_label_values(&["active_anon"]) + .set(meminfo.active_anon.unwrap_or(0) as f64); + gv.with_label_values(&["inactive_anon"]) + .set(meminfo.inactive_anon.unwrap_or(0) as f64); + gv.with_label_values(&["active_file"]) + .set(meminfo.active_file.unwrap_or(0) as f64); + gv.with_label_values(&["inactive_file"]) + .set(meminfo.inactive_file.unwrap_or(0) as f64); + gv.with_label_values(&["unevictable"]) + .set(meminfo.unevictable.unwrap_or(0) as f64); + gv.with_label_values(&["mlocked"]) + .set(meminfo.mlocked.unwrap_or(0) as f64); + gv.with_label_values(&["high_total"]) + .set(meminfo.high_total.unwrap_or(0) as f64); + gv.with_label_values(&["high_free"]) + .set(meminfo.high_free.unwrap_or(0) as f64); + gv.with_label_values(&["low_total"]) + .set(meminfo.low_total.unwrap_or(0) as f64); + gv.with_label_values(&["low_free"]) + .set(meminfo.low_free.unwrap_or(0) as f64); + gv.with_label_values(&["mmap_copy"]) + .set(meminfo.mmap_copy.unwrap_or(0) as f64); + gv.with_label_values(&["swap_total"]) + .set(meminfo.swap_total as f64); + gv.with_label_values(&["swap_free"]) + .set(meminfo.swap_free as f64); + gv.with_label_values(&["dirty"]).set(meminfo.dirty as f64); + gv.with_label_values(&["writeback"]) + .set(meminfo.writeback as f64); + gv.with_label_values(&["anon_pages"]) + .set(meminfo.anon_pages.unwrap_or(0) as f64); + gv.with_label_values(&["mapped"]).set(meminfo.mapped as f64); + gv.with_label_values(&["shmem"]) + .set(meminfo.shmem.unwrap_or(0) as f64); + gv.with_label_values(&["slab"]).set(meminfo.slab as f64); + gv.with_label_values(&["s_reclaimable"]) + .set(meminfo.s_reclaimable.unwrap_or(0) as f64); + gv.with_label_values(&["s_unreclaim"]) + .set(meminfo.s_unreclaim.unwrap_or(0) as f64); + gv.with_label_values(&["kernel_stack"]) + .set(meminfo.kernel_stack.unwrap_or(0) as f64); + gv.with_label_values(&["page_tables"]) + .set(meminfo.page_tables.unwrap_or(0) as f64); + gv.with_label_values(&["quicklists"]) + .set(meminfo.quicklists.unwrap_or(0) as f64); + gv.with_label_values(&["nfs_unstable"]) + .set(meminfo.nfs_unstable.unwrap_or(0) as f64); + gv.with_label_values(&["bounce"]) + .set(meminfo.bounce.unwrap_or(0) as f64); + gv.with_label_values(&["writeback_tmp"]) + .set(meminfo.writeback_tmp.unwrap_or(0) as f64); + gv.with_label_values(&["commit_limit"]) + .set(meminfo.commit_limit.unwrap_or(0) as f64); + gv.with_label_values(&["committed_as"]) + .set(meminfo.committed_as as f64); + gv.with_label_values(&["vmalloc_total"]) + .set(meminfo.vmalloc_total as f64); + gv.with_label_values(&["vmalloc_used"]) + .set(meminfo.vmalloc_used as f64); + gv.with_label_values(&["vmalloc_chunk"]) + .set(meminfo.vmalloc_chunk as f64); + gv.with_label_values(&["hardware_corrupted"]) + .set(meminfo.hardware_corrupted.unwrap_or(0) as f64); + gv.with_label_values(&["anon_hugepages"]) + .set(meminfo.anon_hugepages.unwrap_or(0) as f64); + gv.with_label_values(&["shmem_hugepages"]) + .set(meminfo.shmem_hugepages.unwrap_or(0) as f64); + gv.with_label_values(&["shmem_pmd_mapped"]) + .set(meminfo.shmem_pmd_mapped.unwrap_or(0) as f64); + gv.with_label_values(&["cma_total"]) + .set(meminfo.cma_total.unwrap_or(0) as f64); + gv.with_label_values(&["cma_free"]) + .set(meminfo.cma_free.unwrap_or(0) as f64); + gv.with_label_values(&["hugepages_total"]) + .set(meminfo.hugepages_total.unwrap_or(0) as f64); + gv.with_label_values(&["hugepages_free"]) + .set(meminfo.hugepages_free.unwrap_or(0) as f64); + gv.with_label_values(&["hugepages_rsvd"]) + .set(meminfo.hugepages_rsvd.unwrap_or(0) as f64); + gv.with_label_values(&["hugepages_surp"]) + .set(meminfo.hugepages_surp.unwrap_or(0) as f64); + gv.with_label_values(&["hugepagesize"]) + .set(meminfo.hugepagesize.unwrap_or(0) as f64); + gv.with_label_values(&["direct_map_4k"]) + .set(meminfo.direct_map_4k.unwrap_or(0) as f64); + gv.with_label_values(&["direct_map_4M"]) + .set(meminfo.direct_map_4M.unwrap_or(0) as f64); + gv.with_label_values(&["direct_map_2M"]) + .set(meminfo.direct_map_2M.unwrap_or(0) as f64); + gv.with_label_values(&["direct_map_1G"]) + .set(meminfo.direct_map_1G.unwrap_or(0) as f64); + gv.with_label_values(&["hugetlb"]) + .set(meminfo.hugetlb.unwrap_or(0) as f64); + gv.with_label_values(&["per_cpu"]) + .set(meminfo.per_cpu.unwrap_or(0) as f64); + gv.with_label_values(&["k_reclaimable"]) + .set(meminfo.k_reclaimable.unwrap_or(0) as f64); +} + +fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) { + gv.with_label_values(&[cpu, "user"]) + .set(cpu_time.user as f64); + gv.with_label_values(&[cpu, "nice"]) + .set(cpu_time.nice as f64); + gv.with_label_values(&[cpu, "system"]) + .set(cpu_time.system as f64); + gv.with_label_values(&[cpu, "idle"]) + .set(cpu_time.idle as f64); + gv.with_label_values(&[cpu, "iowait"]) + .set(cpu_time.iowait.unwrap_or(0.0) as f64); + gv.with_label_values(&[cpu, "irq"]) + .set(cpu_time.irq.unwrap_or(0.0) as f64); + gv.with_label_values(&[cpu, "softirq"]) + .set(cpu_time.softirq.unwrap_or(0.0) as f64); + gv.with_label_values(&[cpu, "steal"]) + .set(cpu_time.steal.unwrap_or(0.0) as f64); + gv.with_label_values(&[cpu, "guest"]) + .set(cpu_time.guest.unwrap_or(0.0) as f64); + gv.with_label_values(&[cpu, "guest_nice"]) + .set(cpu_time.guest_nice.unwrap_or(0.0) as f64); +} + +fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat) { + gv.with_label_values(&[diskstat.name.as_str(), "reads"]) + .set(diskstat.reads as f64); + gv.with_label_values(&[diskstat.name.as_str(), "merged"]) + .set(diskstat.merged as f64); + gv.with_label_values(&[diskstat.name.as_str(), "sectors_read"]) + .set(diskstat.sectors_read as f64); + gv.with_label_values(&[diskstat.name.as_str(), "time_reading"]) + .set(diskstat.time_reading as f64); + gv.with_label_values(&[diskstat.name.as_str(), "writes"]) + .set(diskstat.writes as f64); + gv.with_label_values(&[diskstat.name.as_str(), "writes_merged"]) + .set(diskstat.writes_merged as f64); + gv.with_label_values(&[diskstat.name.as_str(), "sectors_written"]) + .set(diskstat.sectors_written as f64); + gv.with_label_values(&[diskstat.name.as_str(), "time_writing"]) + .set(diskstat.time_writing as f64); + gv.with_label_values(&[diskstat.name.as_str(), "in_progress"]) + .set(diskstat.in_progress as f64); + gv.with_label_values(&[diskstat.name.as_str(), "time_in_progress"]) + .set(diskstat.time_in_progress as f64); + gv.with_label_values(&[diskstat.name.as_str(), "weighted_time_in_progress"]) + .set(diskstat.weighted_time_in_progress as f64); + gv.with_label_values(&[diskstat.name.as_str(), "discards"]) + .set(diskstat.discards.unwrap_or(0) as f64); + gv.with_label_values(&[diskstat.name.as_str(), "discards_merged"]) + .set(diskstat.discards_merged.unwrap_or(0) as f64); + gv.with_label_values(&[diskstat.name.as_str(), "sectors_discarded"]) + .set(diskstat.sectors_discarded.unwrap_or(0) as f64); + gv.with_label_values(&[diskstat.name.as_str(), "time_discarding"]) + .set(diskstat.time_discarding.unwrap_or(0) as f64); + gv.with_label_values(&[diskstat.name.as_str(), "flushes"]) + .set(diskstat.flushes.unwrap_or(0) as f64); + gv.with_label_values(&[diskstat.name.as_str(), "time_flushing"]) + .set(diskstat.time_flushing.unwrap_or(0) as f64); +} + +// set_gauge_vec_netdev set gauge for NetDevLine +fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) { + gv.with_label_values(&[status.name.as_str(), "recv_bytes"]) + .set(status.recv_bytes as f64); + gv.with_label_values(&[status.name.as_str(), "recv_packets"]) + .set(status.recv_packets as f64); + gv.with_label_values(&[status.name.as_str(), "recv_errs"]) + .set(status.recv_errs as f64); + gv.with_label_values(&[status.name.as_str(), "recv_drop"]) + .set(status.recv_drop as f64); + gv.with_label_values(&[status.name.as_str(), "recv_fifo"]) + .set(status.recv_fifo as f64); + gv.with_label_values(&[status.name.as_str(), "recv_frame"]) + .set(status.recv_frame as f64); + gv.with_label_values(&[status.name.as_str(), "recv_compressed"]) + .set(status.recv_compressed as f64); + gv.with_label_values(&[status.name.as_str(), "recv_multicast"]) + .set(status.recv_multicast as f64); + gv.with_label_values(&[status.name.as_str(), "sent_bytes"]) + .set(status.sent_bytes as f64); + gv.with_label_values(&[status.name.as_str(), "sent_packets"]) + .set(status.sent_packets as f64); + gv.with_label_values(&[status.name.as_str(), "sent_errs"]) + .set(status.sent_errs as f64); + gv.with_label_values(&[status.name.as_str(), "sent_drop"]) + .set(status.sent_drop as f64); + gv.with_label_values(&[status.name.as_str(), "sent_fifo"]) + .set(status.sent_fifo as f64); + gv.with_label_values(&[status.name.as_str(), "sent_colls"]) + .set(status.sent_colls as f64); + gv.with_label_values(&[status.name.as_str(), "sent_carrier"]) + .set(status.sent_carrier as f64); + gv.with_label_values(&[status.name.as_str(), "sent_compressed"]) + .set(status.sent_compressed as f64); +} + +// set_gauge_vec_proc_status set gauge for ProcStatus +fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) { + gv.with_label_values(&["vmpeak"]) + .set(status.vmpeak.unwrap_or(0) as f64); + gv.with_label_values(&["vmsize"]) + .set(status.vmsize.unwrap_or(0) as f64); + gv.with_label_values(&["vmlck"]) + .set(status.vmlck.unwrap_or(0) as f64); + gv.with_label_values(&["vmpin"]) + .set(status.vmpin.unwrap_or(0) as f64); + gv.with_label_values(&["vmhwm"]) + .set(status.vmhwm.unwrap_or(0) as f64); + gv.with_label_values(&["vmrss"]) + .set(status.vmrss.unwrap_or(0) as f64); + gv.with_label_values(&["rssanon"]) + .set(status.rssanon.unwrap_or(0) as f64); + gv.with_label_values(&["rssfile"]) + .set(status.rssfile.unwrap_or(0) as f64); + gv.with_label_values(&["rssshmem"]) + .set(status.rssshmem.unwrap_or(0) as f64); + gv.with_label_values(&["vmdata"]) + .set(status.vmdata.unwrap_or(0) as f64); + gv.with_label_values(&["vmstk"]) + .set(status.vmstk.unwrap_or(0) as f64); + gv.with_label_values(&["vmexe"]) + .set(status.vmexe.unwrap_or(0) as f64); + gv.with_label_values(&["vmlib"]) + .set(status.vmlib.unwrap_or(0) as f64); + gv.with_label_values(&["vmpte"]) + .set(status.vmpte.unwrap_or(0) as f64); + gv.with_label_values(&["vmswap"]) + .set(status.vmswap.unwrap_or(0) as f64); + gv.with_label_values(&["hugetblpages"]) + .set(status.hugetblpages.unwrap_or(0) as f64); + gv.with_label_values(&["voluntary_ctxt_switches"]) + .set(status.voluntary_ctxt_switches.unwrap_or(0) as f64); + gv.with_label_values(&["nonvoluntary_ctxt_switches"]) + .set(status.nonvoluntary_ctxt_switches.unwrap_or(0) as f64); +} + +// set_gauge_vec_proc_io set gauge for ProcIO +fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) { + gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64); + gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64); + gv.with_label_values(&["syscr"]).set(io_stat.syscr as f64); + gv.with_label_values(&["syscw"]).set(io_stat.syscw as f64); + gv.with_label_values(&["read_bytes"]) + .set(io_stat.read_bytes as f64); + gv.with_label_values(&["write_bytes"]) + .set(io_stat.write_bytes as f64); + gv.with_label_values(&["cancelled_write_bytes]"]) + .set(io_stat.cancelled_write_bytes as f64); +} + +// set_gauge_vec_proc_stat set gauge for ProcStat +fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) { + gv.with_label_values(&["utime"]).set(stat.utime as f64); + gv.with_label_values(&["stime"]).set(stat.stime as f64); + gv.with_label_values(&["cutime"]).set(stat.cutime as f64); + gv.with_label_values(&["cstime"]).set(stat.cstime as f64); +} diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 96298e382f..4176d4be1d 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -10,7 +10,8 @@ use oci::{LinuxNamespace, Spec}; use protobuf::{RepeatedField, SingularPtrField}; use protocols::agent::{ AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, ListProcessesResponse, - ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, WriteStreamResponse, + Metrics, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, + WriteStreamResponse, }; use protocols::empty::Empty; use protocols::health::{ @@ -31,6 +32,7 @@ use rustjail::process::ProcessOperations; use crate::device::{add_devices, rescan_pci_bus, update_device_cgroup}; use crate::linux_abi::*; +use crate::metrics::get_metrics; use crate::mount::{add_storages, remove_mounts, STORAGEHANDLERLIST}; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; use crate::random; @@ -1265,6 +1267,24 @@ impl protocols::agent_ttrpc::AgentService for agentService { Ok(Empty::new()) } + + fn get_metrics( + &self, + _ctx: &ttrpc::TtrpcContext, + req: protocols::agent::GetMetricsRequest, + ) -> ttrpc::Result { + match get_metrics(&req) { + Err(e) => Err(ttrpc::Error::RpcStatus(ttrpc::get_status( + ttrpc::Code::INTERNAL, + e.to_string(), + ))), + Ok(s) => { + let mut metrics = Metrics::new(); + metrics.set_metrics(s); + Ok(metrics) + } + } + } } #[derive(Clone)]