Merge pull request #10279 from alexman-stripe/alexman-stripe/fix-cgroup-v2-wrong-cpu-usage-unit

agent: Fix CPU usage reporting for cgroup v2 in kata-agent
This commit is contained in:
Fabiano Fidêncio 2024-09-18 21:36:52 +02:00 committed by GitHub
commit 1597f8ba00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -724,9 +724,20 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> MessageField<CpuUsage> {
let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg); let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg);
let stat = cpu_controller.cpu().stat; let stat = cpu_controller.cpu().stat;
let h = lines_to_map(&stat); let h = lines_to_map(&stat);
let usage_in_usermode = *h.get("user_usec").unwrap_or(&0); // All fields in CpuUsage are expressed in nanoseconds (ns).
let usage_in_kernelmode = *h.get("system_usec").unwrap_or(&0); //
let total_usage = *h.get("usage_usec").unwrap_or(&0); // For cgroup v1 (cpuacct controller):
// kata-agent reads the cpuacct.stat file, which reports the number of ticks
// consumed by the processes in the cgroup. It then converts these ticks to nanoseconds.
// Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
//
// For cgroup v2 (cpu controller):
// kata-agent reads the cpu.stat file, which reports the time consumed by the
// processes in the cgroup in microseconds (us). It then converts microseconds to nanoseconds.
// Ref: https://www.kernel.org/doc/Documentation/cgroup-v2.txt, section 5-1-1. CPU Interface Files
let usage_in_usermode = *h.get("user_usec").unwrap_or(&0) * 1000;
let usage_in_kernelmode = *h.get("system_usec").unwrap_or(&0) * 1000;
let total_usage = *h.get("usage_usec").unwrap_or(&0) * 1000;
let percpu_usage = vec![]; let percpu_usage = vec![];
MessageField::some(CpuUsage { MessageField::some(CpuUsage {