diff --git a/src/dragonball/src/hypervisor_metrics.rs b/src/dragonball/src/hypervisor_metrics.rs index 228ed7c670..e502d6fe8d 100644 --- a/src/dragonball/src/hypervisor_metrics.rs +++ b/src/dragonball/src/hypervisor_metrics.rs @@ -5,11 +5,14 @@ extern crate procfs; -use crate::metric::{IncMetric, METRICS}; -use anyhow::{anyhow, Result}; -use prometheus::{Encoder, IntCounter, IntGaugeVec, Opts, Registry, TextEncoder}; use std::sync::Mutex; +use anyhow::{anyhow, Result}; +use dbs_utils::metric::IncMetric; +use prometheus::{Encoder, IntCounter, IntGaugeVec, Opts, Registry, TextEncoder}; + +use crate::metric::METRICS; + const NAMESPACE_KATA_HYPERVISOR: &str = "kata_hypervisor"; lazy_static! { @@ -23,7 +26,7 @@ lazy_static! { IntCounter::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"scrape_count"), "Hypervisor metrics scrape count.").unwrap(); static ref HYPERVISOR_VCPU: IntGaugeVec = - IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"vcpu"), "Hypervisor metrics specific to VCPUs' mode of functioning."), &["item"]).unwrap(); + IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"vcpu"), "Hypervisor metrics specific to VCPUs' mode of functioning."), &["cpu_id", "item"]).unwrap(); static ref HYPERVISOR_SECCOMP: IntGaugeVec = IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"seccomp"), "Hypervisor metrics for the seccomp filtering."), &["item"]).unwrap(); @@ -75,30 +78,33 @@ fn update_hypervisor_metrics() -> Result<()> { } fn set_intgauge_vec_vcpu(icv: &prometheus::IntGaugeVec) { - icv.with_label_values(&["exit_io_in"]) - .set(METRICS.vcpu.exit_io_in.count() as i64); - icv.with_label_values(&["exit_io_out"]) - .set(METRICS.vcpu.exit_io_out.count() as i64); - icv.with_label_values(&["exit_mmio_read"]) - .set(METRICS.vcpu.exit_mmio_read.count() as i64); - icv.with_label_values(&["exit_mmio_write"]) - .set(METRICS.vcpu.exit_mmio_write.count() as i64); - icv.with_label_values(&["failures"]) - .set(METRICS.vcpu.failures.count() as i64); - icv.with_label_values(&["filter_cpuid"]) - .set(METRICS.vcpu.filter_cpuid.count() as i64); + let metric_guard = METRICS.read().unwrap(); + for (cpu_id, metrics) in metric_guard.vcpu.iter() { + icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_io_in"]) + .set(metrics.exit_io_in.count() as i64); + icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_io_out"]) + .set(metrics.exit_io_out.count() as i64); + icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_mmio_read"]) + .set(metrics.exit_mmio_read.count() as i64); + icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_mmio_write"]) + .set(metrics.exit_mmio_write.count() as i64); + icv.with_label_values(&[cpu_id.to_string().as_str(), "failures"]) + .set(metrics.failures.count() as i64); + icv.with_label_values(&[cpu_id.to_string().as_str(), "filter_cpuid"]) + .set(metrics.filter_cpuid.count() as i64); + } } fn set_intgauge_vec_seccomp(icv: &prometheus::IntGaugeVec) { - let metric_gurad = METRICS.read().unwrap(); + let metric_guard = METRICS.read().unwrap(); icv.with_label_values(&["num_faults"]) - .set(metric_gurad.seccomp.num_faults.count() as i64); + .set(metric_guard.seccomp.num_faults.count() as i64); } fn set_intgauge_vec_signals(icv: &prometheus::IntGaugeVec) { - let metric_gurad = METRICS.read().unwrap(); + let metric_guard = METRICS.read().unwrap(); icv.with_label_values(&["sigbus"]) - .set(metric_gurad.signals.sigbus.count() as i64); + .set(metric_guard.signals.sigbus.count() as i64); icv.with_label_values(&["sigsegv"]) - .set(metric_gurad.signals.sigsegv.count() as i64); + .set(metric_guard.signals.sigsegv.count() as i64); } diff --git a/src/dragonball/src/metric.rs b/src/dragonball/src/metric.rs index 7ed2536987..01e61395b1 100644 --- a/src/dragonball/src/metric.rs +++ b/src/dragonball/src/metric.rs @@ -2,14 +2,19 @@ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashMap; use std::sync::{Arc, RwLock}; -use dbs_utils::metric::{IncMetric, SharedIncMetric}; +use dbs_utils::metric::SharedIncMetric; use lazy_static::lazy_static; use serde::Serialize; lazy_static! { - /// Static instance used for handling metrics. + /// # Static instance used for handling metrics. + /// + /// Using a big lock over the DragonballMetrics since we have various device metric types + /// and the write operation is only used when creating or removing devices, it has a low + /// competitive overhead. pub static ref METRICS: RwLock = RwLock::new(DragonballMetrics::default()); } @@ -50,9 +55,121 @@ pub struct SignalMetrics { #[derive(Default, Serialize)] pub struct DragonballMetrics { /// Metrics related to a vcpu's functioning. - pub vcpu: VcpuMetrics, + pub vcpu: HashMap>, /// Metrics related to seccomp filtering. pub seccomp: SeccompMetrics, /// Metrics related to signals. pub signals: SignalMetrics, } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::thread; + + use dbs_utils::metric::IncMetric; + + use crate::metric::{VcpuMetrics, METRICS}; + + #[test] + fn test_read_map() { + let metrics = Arc::new(VcpuMetrics::default()); + let vcpu_id: u32 = u32::MIN; + METRICS + .write() + .unwrap() + .vcpu + .insert(vcpu_id, metrics.clone()); + metrics.failures.inc(); + assert_eq!( + METRICS + .read() + .unwrap() + .vcpu + .get(&vcpu_id) + .unwrap() + .failures + .count(), + 1 + ); + } + + #[test] + fn test_metrics_count() { + let metrics = Arc::new(VcpuMetrics::default()); + let vcpu_id: u32 = 65535; + METRICS + .write() + .unwrap() + .vcpu + .insert(vcpu_id, metrics.clone()); + + let metrics1 = metrics.clone(); + let thread1 = thread::spawn(move || { + for _i in 0..10 { + metrics1.exit_io_in.inc(); + } + }); + + let metrics2 = metrics.clone(); + let thread2 = thread::spawn(move || { + for _i in 0..10 { + metrics2.exit_io_in.inc(); + } + }); + thread1.join().unwrap(); + thread2.join().unwrap(); + assert_eq!( + METRICS + .read() + .unwrap() + .vcpu + .get(&vcpu_id) + .unwrap() + .exit_io_in + .count(), + 20 + ); + } + + #[test] + fn test_rw_lock() { + let metrics = Arc::new(VcpuMetrics::default()); + let vcpu_id: u32 = u32::MAX; + METRICS + .write() + .unwrap() + .vcpu + .insert(vcpu_id, metrics.clone()); + + let write_thread = thread::spawn(move || { + for _ in 0..10 { + let metrics = Arc::new(VcpuMetrics::default()); + let vcpu_id: u32 = 128; + METRICS + .write() + .unwrap() + .vcpu + .insert(vcpu_id, metrics.clone()); + } + }); + + let read_thread = thread::spawn(move || { + for _ in 0..10 { + assert_eq!( + METRICS + .read() + .unwrap() + .vcpu + .get(&vcpu_id) + .unwrap() + .failures + .count(), + 0 + ); + } + }); + write_thread.join().unwrap(); + read_thread.join().unwrap(); + } +} diff --git a/src/dragonball/src/vcpu/aarch64.rs b/src/dragonball/src/vcpu/aarch64.rs index ae45cd99d9..a5b76720d2 100644 --- a/src/dragonball/src/vcpu/aarch64.rs +++ b/src/dragonball/src/vcpu/aarch64.rs @@ -10,7 +10,6 @@ use std::ops::Deref; use std::sync::mpsc::{channel, Sender}; use std::sync::Arc; -use crate::IoManagerCached; use dbs_arch::{regs, VpmuFeatureLevel}; use dbs_boot::get_fdt_addr; use dbs_utils::time::TimestampUs; @@ -19,8 +18,10 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace}; use vmm_sys_util::eventfd::EventFd; use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::metric::VcpuMetrics; use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; use crate::vcpu::VcpuConfig; +use crate::IoManagerCached; #[allow(unused)] impl Vcpu { @@ -67,6 +68,7 @@ impl Vcpu { support_immediate_exit, mpidr: 0, exit_evt, + metrics: Arc::new(VcpuMetrics::default()), }) } diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs index 3dffd579f1..83564119bd 100644 --- a/src/dragonball/src/vcpu/vcpu_impl.rs +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -15,6 +15,7 @@ use std::sync::mpsc::{Receiver, Sender, TryRecvError}; use std::sync::{Arc, Barrier}; use std::thread; +use dbs_utils::metric::IncMetric; use dbs_utils::time::TimestampUs; use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; use kvm_ioctls::{VcpuExit, VcpuFd}; @@ -25,7 +26,7 @@ use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::signal::{register_signal_handler, Killable}; use super::sm::StateMachine; -use crate::metric::{IncMetric, METRICS}; +use crate::metric::{VcpuMetrics, METRICS}; use crate::signal_handler::sigrtmin; use crate::IoManagerCached; @@ -303,6 +304,9 @@ pub struct Vcpu { // Whether kvm used supports immediate_exit flag. support_immediate_exit: bool, + // metrics for a vCPU. + metrics: Arc, + // CPUID information for the x86_64 CPU #[cfg(target_arch = "x86_64")] cpuid: kvm_bindings::CpuId, @@ -446,7 +450,7 @@ impl Vcpu { #[cfg(target_arch = "x86_64")] VcpuExit::IoIn(addr, data) => { let _ = self.io_mgr.pio_read(addr, data); - METRICS.vcpu.exit_io_in.inc(); + self.metrics.exit_io_in.inc(); Ok(VcpuEmulation::Handled) } #[cfg(target_arch = "x86_64")] @@ -454,17 +458,17 @@ impl Vcpu { if !self.check_io_port_info(addr, data)? { let _ = self.io_mgr.pio_write(addr, data); } - METRICS.vcpu.exit_io_out.inc(); + self.metrics.exit_io_out.inc(); Ok(VcpuEmulation::Handled) } VcpuExit::MmioRead(addr, data) => { let _ = self.io_mgr.mmio_read(addr, data); - METRICS.vcpu.exit_mmio_read.inc(); + self.metrics.exit_mmio_read.inc(); Ok(VcpuEmulation::Handled) } VcpuExit::MmioWrite(addr, data) => { let _ = self.io_mgr.mmio_write(addr, data); - METRICS.vcpu.exit_mmio_write.inc(); + self.metrics.exit_mmio_write.inc(); Ok(VcpuEmulation::Handled) } VcpuExit::Hlt => { @@ -477,12 +481,12 @@ impl Vcpu { } // Documentation specifies that below kvm exits are considered errors. VcpuExit::FailEntry(reason, cpu) => { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); error!("Received KVM_EXIT_FAIL_ENTRY signal, reason {reason}, cpu number {cpu}"); Err(VcpuError::VcpuUnhandledKvmExit) } VcpuExit::InternalError => { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(VcpuError::VcpuUnhandledKvmExit) } @@ -495,7 +499,7 @@ impl Vcpu { Ok(VcpuEmulation::Stopped) } _ => { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); error!( "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}", event_type, event_flags @@ -504,7 +508,7 @@ impl Vcpu { } }, r => { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? error!("Unexpected exit reason on vcpu run: {:?}", r); @@ -523,7 +527,7 @@ impl Vcpu { Ok(VcpuEmulation::Interrupted) } _ => { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); error!("Failure during vcpu run: {}", e); #[cfg(target_arch = "x86_64")] { @@ -731,7 +735,7 @@ impl Vcpu { fn waiting_exit(&mut self) -> StateMachine { // trigger vmm to stop machine if let Err(e) = self.exit_evt.write(1) { - METRICS.vcpu.failures.inc(); + self.metrics.failures.inc(); error!("Failed signaling vcpu exit event: {}", e); } @@ -765,11 +769,17 @@ impl Vcpu { pub fn vcpu_fd(&self) -> &VcpuFd { self.fd.as_ref() } + + pub fn metrics(&self) -> Arc { + self.metrics.clone() + } } impl Drop for Vcpu { fn drop(&mut self) { let _ = self.reset_thread_local_data(); + let id: u32 = self.id as u32; + METRICS.write().unwrap().vcpu.remove(&id); } } diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index dff3aefc3b..45d0541f48 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -29,6 +29,7 @@ use vmm_sys_util::eventfd::EventFd; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::api::v1::InstanceInfo; use crate::kvm_context::KvmContext; +use crate::metric::METRICS; use crate::vcpu::vcpu_impl::{ Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResizeResult, VcpuResponse, VcpuStateEvent, }; @@ -555,6 +556,11 @@ impl VcpuManager { }; let mut vcpu = self.create_vcpu_arch(cpu_index, kvm_vcpu, request_ts)?; + METRICS + .write() + .unwrap() + .vcpu + .insert(cpu_index as u32, vcpu.metrics()); self.configure_single_vcpu(entry_addr, &mut vcpu) .map_err(VcpuManagerError::Vcpu)?; self.vcpu_infos[cpu_index as usize].vcpu = Some(vcpu); diff --git a/src/dragonball/src/vcpu/x86_64.rs b/src/dragonball/src/vcpu/x86_64.rs index f5616066cb..919f73127f 100644 --- a/src/dragonball/src/vcpu/x86_64.rs +++ b/src/dragonball/src/vcpu/x86_64.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use dbs_arch::cpuid::{process_cpuid, VmSpec}; use dbs_arch::gdt::gdt_entry; +use dbs_utils::metric::IncMetric; use dbs_utils::time::TimestampUs; use kvm_bindings::CpuId; use kvm_ioctls::{VcpuFd, VmFd}; @@ -19,7 +20,7 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace}; use vmm_sys_util::eventfd::EventFd; use crate::address_space_manager::GuestAddressSpaceImpl; -use crate::metric::{IncMetric, METRICS}; +use crate::metric::VcpuMetrics; use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; use crate::vcpu::VcpuConfig; use crate::IoManagerCached; @@ -69,6 +70,7 @@ impl Vcpu { vcpu_state_sender, exit_evt, support_immediate_exit, + metrics: Arc::new(VcpuMetrics::default()), cpuid, }) } @@ -137,7 +139,7 @@ impl Vcpu { ) .map_err(VcpuError::CpuId)?; process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| { - METRICS.vcpu.filter_cpuid.inc(); + self.metrics.filter_cpuid.inc(); error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e); VcpuError::CpuId(e) })?;