mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-29 12:14:48 +00:00
dragonball: vcpu metrics change to be recorded per vcpu
In this commit, the vcpu metrics in Dragonball will be changed to record per-vcpu. Fixes: #7248 Signed-off-by: lisongqian <mail@lisongqian.cn>
This commit is contained in:
parent
fa60fbe023
commit
dbfe6512fc
@ -5,11 +5,14 @@
|
||||
|
||||
extern crate procfs;
|
||||
|
||||
use crate::metric::{IncMetric, METRICS};
|
||||
use anyhow::{anyhow, Result};
|
||||
use prometheus::{Encoder, IntCounter, IntGaugeVec, Opts, Registry, TextEncoder};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use dbs_utils::metric::IncMetric;
|
||||
use prometheus::{Encoder, IntCounter, IntGaugeVec, Opts, Registry, TextEncoder};
|
||||
|
||||
use crate::metric::METRICS;
|
||||
|
||||
const NAMESPACE_KATA_HYPERVISOR: &str = "kata_hypervisor";
|
||||
|
||||
lazy_static! {
|
||||
@ -23,7 +26,7 @@ lazy_static! {
|
||||
IntCounter::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"scrape_count"), "Hypervisor metrics scrape count.").unwrap();
|
||||
|
||||
static ref HYPERVISOR_VCPU: IntGaugeVec =
|
||||
IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"vcpu"), "Hypervisor metrics specific to VCPUs' mode of functioning."), &["item"]).unwrap();
|
||||
IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"vcpu"), "Hypervisor metrics specific to VCPUs' mode of functioning."), &["cpu_id", "item"]).unwrap();
|
||||
|
||||
static ref HYPERVISOR_SECCOMP: IntGaugeVec =
|
||||
IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"seccomp"), "Hypervisor metrics for the seccomp filtering."), &["item"]).unwrap();
|
||||
@ -75,30 +78,33 @@ fn update_hypervisor_metrics() -> Result<()> {
|
||||
}
|
||||
|
||||
fn set_intgauge_vec_vcpu(icv: &prometheus::IntGaugeVec) {
|
||||
icv.with_label_values(&["exit_io_in"])
|
||||
.set(METRICS.vcpu.exit_io_in.count() as i64);
|
||||
icv.with_label_values(&["exit_io_out"])
|
||||
.set(METRICS.vcpu.exit_io_out.count() as i64);
|
||||
icv.with_label_values(&["exit_mmio_read"])
|
||||
.set(METRICS.vcpu.exit_mmio_read.count() as i64);
|
||||
icv.with_label_values(&["exit_mmio_write"])
|
||||
.set(METRICS.vcpu.exit_mmio_write.count() as i64);
|
||||
icv.with_label_values(&["failures"])
|
||||
.set(METRICS.vcpu.failures.count() as i64);
|
||||
icv.with_label_values(&["filter_cpuid"])
|
||||
.set(METRICS.vcpu.filter_cpuid.count() as i64);
|
||||
let metric_guard = METRICS.read().unwrap();
|
||||
for (cpu_id, metrics) in metric_guard.vcpu.iter() {
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_io_in"])
|
||||
.set(metrics.exit_io_in.count() as i64);
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_io_out"])
|
||||
.set(metrics.exit_io_out.count() as i64);
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_mmio_read"])
|
||||
.set(metrics.exit_mmio_read.count() as i64);
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "exit_mmio_write"])
|
||||
.set(metrics.exit_mmio_write.count() as i64);
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "failures"])
|
||||
.set(metrics.failures.count() as i64);
|
||||
icv.with_label_values(&[cpu_id.to_string().as_str(), "filter_cpuid"])
|
||||
.set(metrics.filter_cpuid.count() as i64);
|
||||
}
|
||||
}
|
||||
|
||||
fn set_intgauge_vec_seccomp(icv: &prometheus::IntGaugeVec) {
|
||||
let metric_gurad = METRICS.read().unwrap();
|
||||
let metric_guard = METRICS.read().unwrap();
|
||||
icv.with_label_values(&["num_faults"])
|
||||
.set(metric_gurad.seccomp.num_faults.count() as i64);
|
||||
.set(metric_guard.seccomp.num_faults.count() as i64);
|
||||
}
|
||||
|
||||
fn set_intgauge_vec_signals(icv: &prometheus::IntGaugeVec) {
|
||||
let metric_gurad = METRICS.read().unwrap();
|
||||
let metric_guard = METRICS.read().unwrap();
|
||||
icv.with_label_values(&["sigbus"])
|
||||
.set(metric_gurad.signals.sigbus.count() as i64);
|
||||
.set(metric_guard.signals.sigbus.count() as i64);
|
||||
icv.with_label_values(&["sigsegv"])
|
||||
.set(metric_gurad.signals.sigsegv.count() as i64);
|
||||
.set(metric_guard.signals.sigsegv.count() as i64);
|
||||
}
|
||||
|
@ -2,14 +2,19 @@
|
||||
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use dbs_utils::metric::{IncMetric, SharedIncMetric};
|
||||
use dbs_utils::metric::SharedIncMetric;
|
||||
use lazy_static::lazy_static;
|
||||
use serde::Serialize;
|
||||
|
||||
lazy_static! {
|
||||
/// Static instance used for handling metrics.
|
||||
/// # Static instance used for handling metrics.
|
||||
///
|
||||
/// Using a big lock over the DragonballMetrics since we have various device metric types
|
||||
/// and the write operation is only used when creating or removing devices, it has a low
|
||||
/// competitive overhead.
|
||||
pub static ref METRICS: RwLock<DragonballMetrics> = RwLock::new(DragonballMetrics::default());
|
||||
}
|
||||
|
||||
@ -50,9 +55,121 @@ pub struct SignalMetrics {
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct DragonballMetrics {
|
||||
/// Metrics related to a vcpu's functioning.
|
||||
pub vcpu: VcpuMetrics,
|
||||
pub vcpu: HashMap<u32, Arc<VcpuMetrics>>,
|
||||
/// Metrics related to seccomp filtering.
|
||||
pub seccomp: SeccompMetrics,
|
||||
/// Metrics related to signals.
|
||||
pub signals: SignalMetrics,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use dbs_utils::metric::IncMetric;
|
||||
|
||||
use crate::metric::{VcpuMetrics, METRICS};
|
||||
|
||||
#[test]
|
||||
fn test_read_map() {
|
||||
let metrics = Arc::new(VcpuMetrics::default());
|
||||
let vcpu_id: u32 = u32::MIN;
|
||||
METRICS
|
||||
.write()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.insert(vcpu_id, metrics.clone());
|
||||
metrics.failures.inc();
|
||||
assert_eq!(
|
||||
METRICS
|
||||
.read()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.get(&vcpu_id)
|
||||
.unwrap()
|
||||
.failures
|
||||
.count(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metrics_count() {
|
||||
let metrics = Arc::new(VcpuMetrics::default());
|
||||
let vcpu_id: u32 = 65535;
|
||||
METRICS
|
||||
.write()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.insert(vcpu_id, metrics.clone());
|
||||
|
||||
let metrics1 = metrics.clone();
|
||||
let thread1 = thread::spawn(move || {
|
||||
for _i in 0..10 {
|
||||
metrics1.exit_io_in.inc();
|
||||
}
|
||||
});
|
||||
|
||||
let metrics2 = metrics.clone();
|
||||
let thread2 = thread::spawn(move || {
|
||||
for _i in 0..10 {
|
||||
metrics2.exit_io_in.inc();
|
||||
}
|
||||
});
|
||||
thread1.join().unwrap();
|
||||
thread2.join().unwrap();
|
||||
assert_eq!(
|
||||
METRICS
|
||||
.read()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.get(&vcpu_id)
|
||||
.unwrap()
|
||||
.exit_io_in
|
||||
.count(),
|
||||
20
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rw_lock() {
|
||||
let metrics = Arc::new(VcpuMetrics::default());
|
||||
let vcpu_id: u32 = u32::MAX;
|
||||
METRICS
|
||||
.write()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.insert(vcpu_id, metrics.clone());
|
||||
|
||||
let write_thread = thread::spawn(move || {
|
||||
for _ in 0..10 {
|
||||
let metrics = Arc::new(VcpuMetrics::default());
|
||||
let vcpu_id: u32 = 128;
|
||||
METRICS
|
||||
.write()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.insert(vcpu_id, metrics.clone());
|
||||
}
|
||||
});
|
||||
|
||||
let read_thread = thread::spawn(move || {
|
||||
for _ in 0..10 {
|
||||
assert_eq!(
|
||||
METRICS
|
||||
.read()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.get(&vcpu_id)
|
||||
.unwrap()
|
||||
.failures
|
||||
.count(),
|
||||
0
|
||||
);
|
||||
}
|
||||
});
|
||||
write_thread.join().unwrap();
|
||||
read_thread.join().unwrap();
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,6 @@ use std::ops::Deref;
|
||||
use std::sync::mpsc::{channel, Sender};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::IoManagerCached;
|
||||
use dbs_arch::{regs, VpmuFeatureLevel};
|
||||
use dbs_boot::get_fdt_addr;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
@ -19,8 +18,10 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::metric::VcpuMetrics;
|
||||
use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent};
|
||||
use crate::vcpu::VcpuConfig;
|
||||
use crate::IoManagerCached;
|
||||
|
||||
#[allow(unused)]
|
||||
impl Vcpu {
|
||||
@ -67,6 +68,7 @@ impl Vcpu {
|
||||
support_immediate_exit,
|
||||
mpidr: 0,
|
||||
exit_evt,
|
||||
metrics: Arc::new(VcpuMetrics::default()),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ use std::sync::mpsc::{Receiver, Sender, TryRecvError};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
|
||||
use dbs_utils::metric::IncMetric;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
|
||||
use kvm_ioctls::{VcpuExit, VcpuFd};
|
||||
@ -25,7 +26,7 @@ use vmm_sys_util::eventfd::EventFd;
|
||||
use vmm_sys_util::signal::{register_signal_handler, Killable};
|
||||
|
||||
use super::sm::StateMachine;
|
||||
use crate::metric::{IncMetric, METRICS};
|
||||
use crate::metric::{VcpuMetrics, METRICS};
|
||||
use crate::signal_handler::sigrtmin;
|
||||
use crate::IoManagerCached;
|
||||
|
||||
@ -303,6 +304,9 @@ pub struct Vcpu {
|
||||
// Whether kvm used supports immediate_exit flag.
|
||||
support_immediate_exit: bool,
|
||||
|
||||
// metrics for a vCPU.
|
||||
metrics: Arc<VcpuMetrics>,
|
||||
|
||||
// CPUID information for the x86_64 CPU
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
cpuid: kvm_bindings::CpuId,
|
||||
@ -446,7 +450,7 @@ impl Vcpu {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
VcpuExit::IoIn(addr, data) => {
|
||||
let _ = self.io_mgr.pio_read(addr, data);
|
||||
METRICS.vcpu.exit_io_in.inc();
|
||||
self.metrics.exit_io_in.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
@ -454,17 +458,17 @@ impl Vcpu {
|
||||
if !self.check_io_port_info(addr, data)? {
|
||||
let _ = self.io_mgr.pio_write(addr, data);
|
||||
}
|
||||
METRICS.vcpu.exit_io_out.inc();
|
||||
self.metrics.exit_io_out.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::MmioRead(addr, data) => {
|
||||
let _ = self.io_mgr.mmio_read(addr, data);
|
||||
METRICS.vcpu.exit_mmio_read.inc();
|
||||
self.metrics.exit_mmio_read.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::MmioWrite(addr, data) => {
|
||||
let _ = self.io_mgr.mmio_write(addr, data);
|
||||
METRICS.vcpu.exit_mmio_write.inc();
|
||||
self.metrics.exit_mmio_write.inc();
|
||||
Ok(VcpuEmulation::Handled)
|
||||
}
|
||||
VcpuExit::Hlt => {
|
||||
@ -477,12 +481,12 @@ impl Vcpu {
|
||||
}
|
||||
// Documentation specifies that below kvm exits are considered errors.
|
||||
VcpuExit::FailEntry(reason, cpu) => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
error!("Received KVM_EXIT_FAIL_ENTRY signal, reason {reason}, cpu number {cpu}");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
VcpuExit::InternalError => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
|
||||
Err(VcpuError::VcpuUnhandledKvmExit)
|
||||
}
|
||||
@ -495,7 +499,7 @@ impl Vcpu {
|
||||
Ok(VcpuEmulation::Stopped)
|
||||
}
|
||||
_ => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
error!(
|
||||
"Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
|
||||
event_type, event_flags
|
||||
@ -504,7 +508,7 @@ impl Vcpu {
|
||||
}
|
||||
},
|
||||
r => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
// TODO: Are we sure we want to finish running a vcpu upon
|
||||
// receiving a vm exit that is not necessarily an error?
|
||||
error!("Unexpected exit reason on vcpu run: {:?}", r);
|
||||
@ -523,7 +527,7 @@ impl Vcpu {
|
||||
Ok(VcpuEmulation::Interrupted)
|
||||
}
|
||||
_ => {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
error!("Failure during vcpu run: {}", e);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
@ -731,7 +735,7 @@ impl Vcpu {
|
||||
fn waiting_exit(&mut self) -> StateMachine<Self> {
|
||||
// trigger vmm to stop machine
|
||||
if let Err(e) = self.exit_evt.write(1) {
|
||||
METRICS.vcpu.failures.inc();
|
||||
self.metrics.failures.inc();
|
||||
error!("Failed signaling vcpu exit event: {}", e);
|
||||
}
|
||||
|
||||
@ -765,11 +769,17 @@ impl Vcpu {
|
||||
pub fn vcpu_fd(&self) -> &VcpuFd {
|
||||
self.fd.as_ref()
|
||||
}
|
||||
|
||||
pub fn metrics(&self) -> Arc<VcpuMetrics> {
|
||||
self.metrics.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Vcpu {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.reset_thread_local_data();
|
||||
let id: u32 = self.id as u32;
|
||||
METRICS.write().unwrap().vcpu.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ use vmm_sys_util::eventfd::EventFd;
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::api::v1::InstanceInfo;
|
||||
use crate::kvm_context::KvmContext;
|
||||
use crate::metric::METRICS;
|
||||
use crate::vcpu::vcpu_impl::{
|
||||
Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResizeResult, VcpuResponse, VcpuStateEvent,
|
||||
};
|
||||
@ -555,6 +556,11 @@ impl VcpuManager {
|
||||
};
|
||||
|
||||
let mut vcpu = self.create_vcpu_arch(cpu_index, kvm_vcpu, request_ts)?;
|
||||
METRICS
|
||||
.write()
|
||||
.unwrap()
|
||||
.vcpu
|
||||
.insert(cpu_index as u32, vcpu.metrics());
|
||||
self.configure_single_vcpu(entry_addr, &mut vcpu)
|
||||
.map_err(VcpuManagerError::Vcpu)?;
|
||||
self.vcpu_infos[cpu_index as usize].vcpu = Some(vcpu);
|
||||
|
@ -11,6 +11,7 @@ use std::sync::Arc;
|
||||
|
||||
use dbs_arch::cpuid::{process_cpuid, VmSpec};
|
||||
use dbs_arch::gdt::gdt_entry;
|
||||
use dbs_utils::metric::IncMetric;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use kvm_bindings::CpuId;
|
||||
use kvm_ioctls::{VcpuFd, VmFd};
|
||||
@ -19,7 +20,7 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace};
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::metric::{IncMetric, METRICS};
|
||||
use crate::metric::VcpuMetrics;
|
||||
use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent};
|
||||
use crate::vcpu::VcpuConfig;
|
||||
use crate::IoManagerCached;
|
||||
@ -69,6 +70,7 @@ impl Vcpu {
|
||||
vcpu_state_sender,
|
||||
exit_evt,
|
||||
support_immediate_exit,
|
||||
metrics: Arc::new(VcpuMetrics::default()),
|
||||
cpuid,
|
||||
})
|
||||
}
|
||||
@ -137,7 +139,7 @@ impl Vcpu {
|
||||
)
|
||||
.map_err(VcpuError::CpuId)?;
|
||||
process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| {
|
||||
METRICS.vcpu.filter_cpuid.inc();
|
||||
self.metrics.filter_cpuid.inc();
|
||||
error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e);
|
||||
VcpuError::CpuId(e)
|
||||
})?;
|
||||
|
Loading…
Reference in New Issue
Block a user