From aaa96c749b80d1b3056beac32087981449909e79 Mon Sep 17 00:00:00 2001
From: Yushuo <y-shuo@linux.alibaba.com>
Date: Tue, 9 May 2023 14:35:44 +0800
Subject: [PATCH] feat(runtime-rs): modify onlineCpuMemRequest

Some vmms, such as dragonball, will actively help us
perform online cpu operations when doing cpu hotplug.
Under the old onlineCpuMem interface, it is difficult
to adapt to this situation.

So we modify the semantics of nb_cpus in onlineCpuMemRequest.
In the original semantics, nb_cpus represents the number of
newly added CPUs that need to be online. The modified
semantics become that the number of online CPUs in the guest
needs to be guaranteed.

Fixes: #5030

Signed-off-by: Yushuo <y-shuo@linux.alibaba.com>
Signed-off-by: Ji-Xinyou <jerryji0414@outlook.com>
---
 src/agent/src/linux_abi.rs                    |  3 +-
 src/agent/src/sandbox.rs                      | 44 ++++++++++++++-----
 src/libs/protocols/protos/agent.proto         |  3 +-
 .../crates/resource/src/cpu_mem/cpu.rs        |  2 +-
 src/runtime/virtcontainers/agent.go           |  3 +-
 .../pkg/agent/protocols/grpc/agent.pb.go      |  3 +-
 src/runtime/virtcontainers/sandbox.go         |  5 +--
 src/runtime/virtcontainers/vm.go              |  2 +-
 8 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs
index 042acd0aed..de131faf02 100644
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -81,7 +81,8 @@ cfg_if! {
 // sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
 pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
 
-pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";
+pub const SYSFS_CPU_PATH: &str = "/sys/devices/system/cpu";
+pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu/online";
 
 pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes";
 pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs
index 34275cc411..175304ed14 100644
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -12,6 +12,7 @@ use crate::pci;
 use crate::uevent::{Uevent, UeventMatcher};
 use crate::watcher::BindWatcher;
 use anyhow::{anyhow, Context, Result};
+use kata_types::cpu::CpuSet;
 use libc::pid_t;
 use oci::{Hook, Hooks};
 use protocols::agent::OnlineCPUMemRequest;
@@ -25,6 +26,7 @@ use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
+use std::str::FromStr;
 use std::sync::Arc;
 use std::{thread, time};
 use tokio::sync::mpsc::{channel, Receiver, Sender};
@@ -263,12 +265,12 @@ impl Sandbox {
     pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
         if req.nb_cpus > 0 {
             // online cpus
-            online_cpus(&self.logger, req.nb_cpus as i32)?;
+            online_cpus(&self.logger, req.nb_cpus as i32).context("online cpus")?;
         }
 
         if !req.cpu_only {
             // online memory
-            online_memory(&self.logger)?;
+            online_memory(&self.logger).context("online memory")?;
         }
 
         if req.nb_cpus == 0 {
@@ -432,23 +434,33 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
 
 // max wait for all CPUs to online will use 50 * 100 = 5 seconds.
 const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
-const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;
+const ONLINE_CPUMEM_MAX_RETRIES: i32 = 100;
 
 #[instrument]
 fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
-    let mut onlined_count: i32 = 0;
+    let mut onlined_cpu_count = onlined_cpus().context("onlined cpu count")?;
+    // for some vmms, like dragonball, they will online cpus for us
+    // so check first whether agent need to do the online operation
+    if onlined_cpu_count >= num {
+        return Ok(num);
+    }
 
     for i in 0..ONLINE_CPUMEM_MAX_RETRIES {
-        let r = online_resources(
+        // online num resources
+        online_resources(
             logger,
-            SYSFS_CPU_ONLINE_PATH,
+            SYSFS_CPU_PATH,
             r"cpu[0-9]+",
-            num - onlined_count,
-        );
+            num - onlined_cpu_count,
+        )
+        .context("online cpu resource")?;
 
-        onlined_count += r?;
-        if onlined_count == num {
-            info!(logger, "online {} CPU(s) after {} retries", num, i);
+        onlined_cpu_count = onlined_cpus().context("onlined cpu count")?;
+        if onlined_cpu_count >= num {
+            info!(
+                logger,
+                "Currently {} onlined CPU(s) after {} retries", onlined_cpu_count, i
+            );
             return Ok(num);
         }
         thread::sleep(time::Duration::from_millis(ONLINE_CPUMEM_WATI_MILLIS));
@@ -463,10 +475,18 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
 
 #[instrument]
 fn online_memory(logger: &Logger) -> Result<()> {
-    online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
+    online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)
+        .context("online memory resource")?;
     Ok(())
 }
 
+fn onlined_cpus() -> Result<i32> {
+    let content =
+        fs::read_to_string(SYSFS_CPU_ONLINE_PATH).context("read sysfs cpu online file")?;
+    let online_cpu_set = CpuSet::from_str(content.trim())?;
+    Ok(online_cpu_set.len() as i32)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/libs/protocols/protos/agent.proto b/src/libs/protocols/protos/agent.proto
index 3ad755256c..9ed7c0a049 100644
--- a/src/libs/protocols/protos/agent.proto
+++ b/src/libs/protocols/protos/agent.proto
@@ -366,7 +366,8 @@ message OnlineCPUMemRequest {
 	// resources are connected asynchronously and the agent returns immediately.
 	bool wait = 1;
 
-	// NbCpus specifies the number of CPUs that were added and the agent has to online.
+	// NbCpus specifies the number of CPUs that should be onlined in the guest.
+	// Special value 0 means agent will skip this check.
 	uint32 nb_cpus = 2;
 
 	// CpuOnly specifies whether only online CPU or not.
diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
index 7892c014eb..661e1a5e3a 100644
--- a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
@@ -176,7 +176,7 @@ impl CpuResource {
             agent
                 .online_cpu_mem(OnlineCPUMemRequest {
                     wait: false,
-                    nb_cpus: add,
+                    nb_cpus: new,
                     cpu_only: true,
                 })
                 .await
diff --git a/src/runtime/virtcontainers/agent.go b/src/runtime/virtcontainers/agent.go
index ddf11d9ce2..8f0aca26ee 100644
--- a/src/runtime/virtcontainers/agent.go
+++ b/src/runtime/virtcontainers/agent.go
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"context"
+
 	persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api"
 	pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"
@@ -119,7 +120,7 @@ type agent interface {
 
 	// onlineCPUMem will online CPUs and Memory inside the Sandbox.
 	// This function should be called after hot adding vCPUs or Memory.
-	// cpus specifies the number of CPUs that were added and the agent should online
+	// cpus specifies the number of CPUs that should be onlined in the guest, and special value 0 means agent will skip this check.
 	// cpuOnly specifies that we should online cpu or online memory or both
 	onlineCPUMem(ctx context.Context, cpus uint32, cpuOnly bool) error
 
diff --git a/src/runtime/virtcontainers/pkg/agent/protocols/grpc/agent.pb.go b/src/runtime/virtcontainers/pkg/agent/protocols/grpc/agent.pb.go
index a8dd81104a..3a1b7c607f 100644
--- a/src/runtime/virtcontainers/pkg/agent/protocols/grpc/agent.pb.go
+++ b/src/runtime/virtcontainers/pkg/agent/protocols/grpc/agent.pb.go
@@ -1924,7 +1924,8 @@ type OnlineCPUMemRequest struct {
 	// If true the agent returns once all resources have been connected, otherwise all
 	// resources are connected asynchronously and the agent returns immediately.
 	Wait bool `protobuf:"varint,1,opt,name=wait,proto3" json:"wait,omitempty"`
-	// NbCpus specifies the number of CPUs that were added and the agent has to online.
+	// NbCpus specifies the number of CPUs that should be onlined in the guest.
+	// Special value 0 means agent will skip this check.
 	NbCpus uint32 `protobuf:"varint,2,opt,name=nb_cpus,json=nbCpus,proto3" json:"nb_cpus,omitempty"`
 	// CpuOnly specifies whether only online CPU or not.
 	CpuOnly              bool     `protobuf:"varint,3,opt,name=cpu_only,json=cpuOnly,proto3" json:"cpu_only,omitempty"`
diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go
index b0697fd840..9bd5f402eb 100644
--- a/src/runtime/virtcontainers/sandbox.go
+++ b/src/runtime/virtcontainers/sandbox.go
@@ -2117,9 +2117,8 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
 	s.Logger().Debugf("Request to hypervisor to update oldCPUs/newCPUs: %d/%d", oldCPUs, newCPUs)
 	// If the CPUs were increased, ask agent to online them
 	if oldCPUs < newCPUs {
-		vcpusAdded := newCPUs - oldCPUs
-		s.Logger().Debugf("Request to onlineCPUMem with %d CPUs", vcpusAdded)
-		if err := s.agent.onlineCPUMem(ctx, vcpusAdded, true); err != nil {
+		s.Logger().Debugf("Request to onlineCPUMem with %d CPUs", newCPUs)
+		if err := s.agent.onlineCPUMem(ctx, newCPUs, true); err != nil {
 			return err
 		}
 	}
diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go
index b5dec99122..72afa9581c 100644
--- a/src/runtime/virtcontainers/vm.go
+++ b/src/runtime/virtcontainers/vm.go
@@ -293,7 +293,7 @@ func (v *VM) AddMemory(ctx context.Context, numMB uint32) error {
 // OnlineCPUMemory puts the hotplugged CPU and memory online.
 func (v *VM) OnlineCPUMemory(ctx context.Context) error {
 	v.logger().Infof("online CPU %d and memory", v.cpuDelta)
-	err := v.agent.onlineCPUMem(ctx, v.cpuDelta, false)
+	err := v.agent.onlineCPUMem(ctx, v.cpu, false)
 	if err == nil {
 		v.cpuDelta = 0
 	}