Merge pull request #12729 from fidencio/topic/kata-deploy-nydus-dont-touch-data-dir-on-install

kata-deploy: nydus: never remove the data dir
Merge pull request #12725 from kata-containers/sprt/cargo-check-fix
2026-03-25 14:12:21 +00:00 · 2026-03-25 10:28:50 +00:00 · 2026-03-25 10:21:16 +00:00 · 2026-03-25 10:20:23 +00:00 · 2026-03-25 10:11:10 +00:00 · 2026-03-25 10:23:53 +01:00
25 changed files with 2273 additions and 54 deletions
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
@@ -727,7 +727,7 @@ disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
 #   - block-encrypted
 #     Plugs a block device to be encrypted in the guest.
 #
-emptydir_mode = "@DEFEMPTYDIRMODE@"
+emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -704,7 +704,7 @@ disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
 #   - block-encrypted
 #     Plugs a block device to be encrypted in the guest.
 #
-emptydir_mode = "@DEFEMPTYDIRMODE@"
+emptydir_mode = "@DEFEMPTYDIRMODE_COCO@"

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
--- a/tests/gha-run-k8s-common.sh
+++ b/tests/gha-run-k8s-common.sh
@@ -795,7 +795,7 @@ function helm_helper() {
 		disable_snapshotter_setup=false
 		for shim in ${HELM_SHIMS}; do
 			case "${shim}" in
-				qemu-tdx|qemu-snp)
+				qemu-snp)
 					disable_snapshotter_setup=true
 					break
 					;;
@@ -804,7 +804,7 @@ function helm_helper() {

 		# Safety check: Fail if EXPERIMENTAL_SETUP_SNAPSHOTTER is set when using SNP/TDX shims
 		if [[ "${disable_snapshotter_setup}" == "true" ]] && [[ -n "${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER}" ]]; then
-			die "ERROR: HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER cannot be set when using SNP/TDX shims (qemu-snp, qemu-tdx, qemu-nvidia-gpu-snp, qemu-nvidia-gpu-tdx). snapshotter.setup must always be disabled for these shims."
+			die "ERROR: HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER cannot be set when using SNP shims (qemu-snp). snapshotter.setup must always be disabled for these shims."
 		fi

 		if [[ -n "${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER}" ]]; then
--- a/tests/integration/kubernetes/confidential_kbs.sh
+++ b/tests/integration/kubernetes/confidential_kbs.sh
@@ -272,12 +272,29 @@ kbs_uninstall_cli() {
 	fi
 }

+# Ensure ~/.cicd/venv exists and activate it in the current shell.
+ensure_cicd_python_venv() {
+	local venv_path="${HOME}/.cicd/venv"
+	if [[ ! -f "${venv_path}/bin/activate" ]]; then
+		# NIM tests need Python 3.10 via pyenv; attestation uses system python3. Both are fine.
+		if command -v pyenv &>/dev/null; then
+			export PYENV_ROOT="${HOME}/.pyenv"
+			[[ -d "${PYENV_ROOT}/bin" ]] && export PATH="${PYENV_ROOT}/bin:${PATH}"
+			eval "$(pyenv init - bash)"
+		fi
+		mkdir -p "${HOME}/.cicd"
+		python3 -m venv "${venv_path}"
+	fi
+	# shellcheck disable=SC1091
+	source "${venv_path}/bin/activate"
+}
+
 # Ensure the sev-snp-measure utility is installed.
 #
 ensure_sev_snp_measure() {
 	command -v sev-snp-measure >/dev/null && return

-	source "${HOME}"/.cicd/venv/bin/activate
+	ensure_cicd_python_venv
 	pip install sev-snp-measure
 }

--- a/tests/integration/kubernetes/gha-run.sh
+++ b/tests/integration/kubernetes/gha-run.sh
@@ -176,7 +176,7 @@ function deploy_kata() {

 	# Workaround to avoid modifying the workflow yaml files
 	case "${KATA_HYPERVISOR}" in
-		qemu-nvidia-gpu-*)
+		qemu-tdx|qemu-nvidia-gpu-*)
 			USE_EXPERIMENTAL_SETUP_SNAPSHOTTER=true
 			SNAPSHOTTER="nydus"
 			EXPERIMENTAL_FORCE_GUEST_PULL=false
@@ -220,7 +220,7 @@ function deploy_kata() {
 				# deployed when the machine is configured, as on the BM machines).
 				if [[ ${ARCH} == "x86_64" ]]; then
 					case "${KATA_HYPERVISOR}" in
-						qemu-coco-dev*|qemu-nvidia-gpu-*) EXPERIMENTAL_SETUP_SNAPSHOTTER="${SNAPSHOTTER}" ;;
+						qemu-tdx|qemu-coco-dev*|qemu-nvidia-gpu-*) EXPERIMENTAL_SETUP_SNAPSHOTTER="${SNAPSHOTTER}" ;;
 						*) ;;
 					esac
 				fi
--- a/tests/integration/kubernetes/k8s-nvidia-nim.bats
+++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats
@@ -70,8 +70,7 @@ NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64=$(echo -n "${NGC_API_KEY_SEALED_SECRET_
 export NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64

 setup_langchain_flow() {
-    # shellcheck disable=SC1091  # Sourcing virtual environment activation script
-    source "${HOME}"/.cicd/venv/bin/activate
+    ensure_cicd_python_venv

    pip install --upgrade pip
    [[ "$(pip show langchain 2>/dev/null | awk '/^Version:/{print $2}')" = "0.2.5" ]] || pip install langchain==0.2.5
@@ -177,13 +176,6 @@ setup_file() {

    dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq

-    export PYENV_ROOT="${HOME}/.pyenv"
-    [[ -d ${PYENV_ROOT}/bin ]] && export PATH="${PYENV_ROOT}/bin:${PATH}"
-    eval "$(pyenv init - bash)"
-
-    # shellcheck disable=SC1091  # Virtual environment will be created during test execution
-    python3 -m venv "${HOME}"/.cicd/venv
-
    setup_langchain_flow

    policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")"
@@ -262,8 +254,6 @@ setup_file() {
    QUESTION="What is the capital of France?"
    ANSWER="The capital of France is Paris."

-    # shellcheck disable=SC1091  # Sourcing virtual environment activation script
-    source "${HOME}"/.cicd/venv/bin/activate
    # shellcheck disable=SC2031  # Variables are used in heredoc, not subshell
    cat <<EOF >"${HOME}"/.cicd/venv/langchain_nim.py
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
@@ -295,8 +285,6 @@ EOF
    # shellcheck disable=SC2031  # Variables are shared via file between BATS tests
    [[ -n "${MODEL_NAME}" ]]

-    # shellcheck disable=SC1091  # Sourcing virtual environment activation script
-    source "${HOME}"/.cicd/venv/bin/activate
    cat <<EOF >"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
 import os
 from langchain.chains import ConversationalRetrievalChain, LLMChain
--- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in
@@ -69,7 +69,14 @@ spec:
      limits:
        nvidia.com/pgpu: "1"
        cpu: "16"
-        memory: "128Gi"
+        memory: "64Gi"
+    volumeMounts:
+      - name: nim-trusted-cache
+        mountPath: /opt/nim/.cache
+  volumes:
+  - name: nim-trusted-cache
+    emptyDir:
+      sizeLimit: 64Gi
 ---
 apiVersion: v1
 kind: Secret
--- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in
@@ -79,7 +79,14 @@ spec:
      limits:
        nvidia.com/pgpu: "1"
        cpu: "16"
-        memory: "48Gi"
+        memory: "32Gi"
+    volumeMounts:
+      - name: nim-trusted-cache
+        mountPath: /opt/nim/.cache
+  volumes:
+  - name: nim-trusted-cache
+    emptyDir:
+      sizeLimit: 40Gi
 ---
 apiVersion: v1
 kind: Secret
--- a/tests/integration/kubernetes/tests_common.sh
+++ b/tests/integration/kubernetes/tests_common.sh
@@ -150,7 +150,7 @@ install_genpolicy_drop_ins() {
 		cp "${examples_dir}/20-oci-1.2.0-drop-in.json" "${settings_d}/"
 	elif is_k3s_or_rke2; then
 		cp "${examples_dir}/20-oci-1.2.1-drop-in.json" "${settings_d}/"
-	elif is_nvidia_gpu_platform || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
+	elif is_nvidia_gpu_platform || [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
 		cp "${examples_dir}/20-oci-1.3.0-drop-in.json" "${settings_d}/"
 	fi

--- a/tools/packaging/kata-deploy/Dockerfile
+++ b/tools/packaging/kata-deploy/Dockerfile
@@ -9,7 +9,7 @@ FROM golang:1.24-alpine AS nydus-binary-downloader

 # Keep the version here aligned with "ndyus-snapshotter.version"
 # in versions.yaml
-ARG NYDUS_SNAPSHOTTER_VERSION=v0.15.10
+ARG NYDUS_SNAPSHOTTER_VERSION=v0.15.13
 ARG NYDUS_SNAPSHOTTER_REPO=https://github.com/containerd/nydus-snapshotter

 RUN \
--- a/tools/packaging/kata-deploy/binary/src/artifacts/snapshotters.rs
+++ b/tools/packaging/kata-deploy/binary/src/artifacts/snapshotters.rs
@@ -143,21 +143,30 @@ pub async fn install_nydus_snapshotter(config: &Config) -> Result<()> {
        _ => "nydus-snapshotter".to_string(),
    };

-    // Clean up existing nydus-snapshotter state to ensure fresh start with new version.
-    // This is safe across all K8s distributions (k3s, rke2, k0s, microk8s, etc.) because
-    // we only touch the nydus data directory, not containerd's internals.
-    // When containerd tries to use non-existent snapshots, it will re-pull/re-unpack.
-    let nydus_data_dir = format!("/host/var/lib/{nydus_snapshotter}");
-    info!("Cleaning up existing nydus-snapshotter state at {}", nydus_data_dir);
-
-    // Stop the service first if it exists (ignore errors if not running)
+    // Stop the service if it is currently running so we can replace the binaries safely.
    let _ = utils::host_systemctl(&["stop", &format!("{nydus_snapshotter}.service")]);

-    // Remove the data directory to clean up old snapshots with potentially incorrect labels
-    if Path::new(&nydus_data_dir).exists() {
-        info!("Removing nydus data directory: {}", nydus_data_dir);
-        fs::remove_dir_all(&nydus_data_dir).ok();
-    }
+    // The nydus data directory (/var/lib/nydus-snapshotter) is intentionally preserved
+    // across reinstalls.  Removing it would create a split-brain state: the nydus backend
+    // would start empty while containerd's BoltDB (meta.db) still holds snapshot records
+    // from the previous run.  Any subsequent image pull then fails with:
+    //
+    //   "unable to prepare extraction snapshot:
+    //    target snapshot \"sha256:...\": already exists"
+    //
+    // because the metadata layer finds the target chainID in BoltDB and returns AlreadyExists
+    // before the backend is consulted, but when Stat() delegates to the (now empty) backend
+    // it gets NotFound — tripping the unpacker's retry loop.
+    //
+    // Cleaning up containerd's meta.db before wiping the dir was attempted, but that cleanup
+    // itself requires the nydus gRPC service to be reachable (ctr snapshots rm calls the
+    // backend).  If the service was stopped or crashed before the cleanup ran, the cleanup
+    // silently fails and the split-brain state reappears.
+    //
+    // The correct invariant is simpler: meta.db and the nydus backend must always agree.
+    // Preserving the data directory across reinstalls guarantees this at zero cost.
+    // Any stale snapshots from previous workloads are naturally garbage-collected by
+    // containerd once the images that reference them are removed.

    let config_guest_pulling = "/opt/kata-artifacts/nydus-snapshotter/config-guest-pulling.toml";
    let nydus_snapshotter_service =
@@ -275,6 +284,14 @@ pub async fn uninstall_nydus_snapshotter(config: &Config) -> Result<()> {
    .ok();
    fs::remove_dir_all(format!("{}/nydus-snapshotter", config.host_install_dir)).ok();

+    // The nydus data directory (/var/lib/nydus-snapshotter) is intentionally preserved.
+    // See install_nydus_snapshotter for the full explanation: meta.db and the nydus backend
+    // must always agree, and the only way to guarantee that without complex, fragile cleanup
+    // logic is to never remove the data directory.  After uninstall, containerd is
+    // reconfigured without the nydus proxy_plugins entry and restarted, so the remaining
+    // snapshot records in meta.db are completely dormant — nothing will use them.  If nydus
+    // is reinstalled later the data directory is still present and both sides remain in sync.
+
    utils::host_systemctl(&["daemon-reload"])?;

    Ok(())
--- a/tools/packaging/kernel/build-kernel.sh
+++ b/tools/packaging/kernel/build-kernel.sh
@@ -708,8 +708,8 @@ main() {
 		build_type="dragonball-experimental"
 		if [ -n "$kernel_version" ];  then
 			kernel_major_version=$(get_major_kernel_version "${kernel_version}")
-			if [[ ${kernel_major_version} != "5.10" ]]; then
-				info "dragonball-experimental kernel patches are only tested on 5.10.x kernel now, other kernel version may cause confliction"
+			if [[ ${kernel_major_version} != "6.18" ]]; then
+				info "dragonball-experimental kernel patches are only tested on 6.18.x kernel now, other kernel version may cause confliction"
 			fi
 		fi
 	fi
--- a/tools/packaging/kernel/kata_config_version
+++ b/tools/packaging/kernel/kata_config_version
@@ -1 +1 @@
-187
+188
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0001-upcall-establish-upcall-server.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0001-upcall-establish-upcall-server.patch
@@ -0,0 +1,525 @@
+From b6466a47048621a7d7dd72cad22d267d052d01a0 Mon Sep 17 00:00:00 2001
+From: Chao Wu <chaowu@linux.alibaba.com>
+Date: Wed, 9 Nov 2022 11:38:36 +0800
+Subject: [PATCH 1/8] upcall: establish upcall server
+
+Upcall is a direct communication tool between hypervisor and guest. This
+patch introduces the server side in the upcall system.
+At the start of the upcall server, A kthread `db-vsock-srv` will be
+created. In this kthread, a vsock listener is established upon specific
+port(currently that port is 0xDB, DB refers to Dragonball). After socket
+is created, it will start accepting the connection from the client side.
+If the connection is established, upcall server will try to get cmd from
+the client and that cmd could determine which upcall service will handle
+the request from the client.
+
+Besides, different service needs to be registered into upcall server so
+that it could handle the request from the client. There is a
+`register_db_vsock_service` in this commit provided for every service to
+register service into service_entry list during initialization and we will
+introduce device manager service in the following commits.
+
+Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
+Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
+Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
+Signed-off-by: WangYu <WangYu@linux.alibaba.com>
+Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
+Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
+---
+ drivers/misc/Kconfig                          |   1 +
+ drivers/misc/Makefile                         |   1 +
+ drivers/misc/dragonball/Kconfig               |  21 ++
+ drivers/misc/dragonball/Makefile              |   6 +
+ drivers/misc/dragonball/upcall_srv/Kconfig    |  14 +
+ drivers/misc/dragonball/upcall_srv/Makefile   |  13 +
+ .../upcall_srv/dragonball_upcall_srv.c        | 327 ++++++++++++++++++
+ include/dragonball/upcall_srv.h               |  42 +++
+ 8 files changed, 425 insertions(+)
+ create mode 100644 drivers/misc/dragonball/Kconfig
+ create mode 100644 drivers/misc/dragonball/Makefile
+ create mode 100644 drivers/misc/dragonball/upcall_srv/Kconfig
+ create mode 100644 drivers/misc/dragonball/upcall_srv/Makefile
+ create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+ create mode 100644 include/dragonball/upcall_srv.h
+
+diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
+index b9c11f67315f0..7391c581339e1 100644
+--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
+@@ -661,4 +661,5 @@ source "drivers/misc/mchp_pci1xxxx/Kconfig"
+ source "drivers/misc/keba/Kconfig"
+ source "drivers/misc/amd-sbi/Kconfig"
+ source "drivers/misc/rp1/Kconfig"
+source "drivers/misc/dragonball/Kconfig"
+ endmenu
+diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
+index b32a2597d2467..240de7f238fa2 100644
+--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
+@@ -75,3 +75,4 @@ obj-$(CONFIG_MCHP_LAN966X_PCI)	+= lan966x-pci.o
+ obj-y				+= keba/
+ obj-y				+= amd-sbi/
+ obj-$(CONFIG_MISC_RP1)		+= rp1/
+obj-$(CONFIG_DRAGONBALL_DRIVERS)        += dragonball/
+diff --git a/drivers/misc/dragonball/Kconfig b/drivers/misc/dragonball/Kconfig
+new file mode 100644
+index 0000000000000..f81be37219081
+--- /dev/null
+++ b/drivers/misc/dragonball/Kconfig
+@@ -0,0 +1,21 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers
+#
+
+menuconfig DRAGONBALL_DRIVERS
+	bool "Alibaba Dragonball Secure Container Runtime Drivers"
+	depends on X86_64 || ARM64
+	default n
+	help
+	  Alibaba Dragonball is a secure container runtime with an embedded micro-vmm
+	  to securely isolate container workloads.
+
+	  Say Y here to get to see options for various misc drivers to support the
+	  Alibaba Dragonball secure container runtime. This option alone does not
+	  add any kernel code.
+
+	  If unsure, say N.
+
+if DRAGONBALL_DRIVERS
+source "drivers/misc/dragonball/upcall_srv/Kconfig"
+endif # DRAGONBALL_DRIVERS
+diff --git a/drivers/misc/dragonball/Makefile b/drivers/misc/dragonball/Makefile
+new file mode 100644
+index 0000000000000..b7bd86d73ade9
+--- /dev/null
+++ b/drivers/misc/dragonball/Makefile
+@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Dragonball misc drivers
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV)	+= upcall_srv/
+diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
+new file mode 100644
+index 0000000000000..b00bf1f8637db
+--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
+@@ -0,0 +1,14 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers for vsock
+#
+
+config DRAGONBALL_UPCALL_SRV
+	bool "Dragonball in-kernel Virtual Sockets Server"
+	depends on VIRTIO_VSOCKETS
+	default y
+	help
+	  This configure implements an in-kernel vsock server to dispatch Dragonball
+	  requests to registered service handlers, based on the reliable Virtual
+	  Sockets communication channels between guest and host/vmm.
+
+	  If unsure, say N.
+diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
+new file mode 100644
+index 0000000000000..4102e6c7edefd
+--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
+@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the in-kernel vsock server.
+#
+# Copyright (C) 2022 Alibaba Cloud, Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) 	+= dragonball_upcall_srv.o
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+new file mode 100644
+index 0000000000000..bab35baa8c167
+--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+ * Dragonball upcall server
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-upcall-srv: " fmt
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vm_sockets.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <dragonball/upcall_srv.h>
+
+struct db_conn_info {
+	struct work_struct work;
+	struct socket *sock;
+};
+
+struct db_service_entry {
+	char			cmd;
+	db_vsock_svc_handler_t	handler;
+	struct list_head	list;
+};
+
+/* Protects registered command. */
+static DEFINE_MUTEX(db_service_lock);
+static LIST_HEAD(db_service_list);
+
+static struct task_struct *db_service_task;
+static unsigned int db_server_port = DB_SERVER_PORT;
+
+struct socket *db_create_vsock_listener(unsigned int port)
+{
+	struct socket *sock;
+	int ret = 0;
+
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = port,
+			.svm_cid = VMADDR_CID_ANY,
+		}
+	};
+
+	ret = sock_create_kern(&init_net, AF_VSOCK, SOCK_STREAM, 0, &sock);
+	if (ret) {
+		pr_err("Server vsock create failed, err: %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = sock->ops->bind(sock, &addr.sa, sizeof(addr.svm));
+	if (ret) {
+		pr_err("Server vsock bind failed, err: %d\n", ret);
+		goto err;
+	}
+	ret = sock->ops->listen(sock, 10);
+	if (ret < 0) {
+		pr_err("Server vsock listen error: %d\n", ret);
+		goto err;
+	}
+
+	return sock;
+err:
+	sock_release(sock);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(db_create_vsock_listener);
+
+int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len)
+{
+	struct kvec vec;
+	struct msghdr msgh;
+
+	vec.iov_base = buf;
+	vec.iov_len  = len;
+	memset(&msgh, 0, sizeof(msgh));
+
+	return kernel_sendmsg(sock, &msgh, &vec, 1, len);
+}
+EXPORT_SYMBOL_GPL(db_vsock_sendmsg);
+
+int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len, int flags)
+{
+	struct kvec vec;
+	struct msghdr msgh;
+
+	memset(&vec, 0, sizeof(vec));
+	memset(&msgh, 0, sizeof(msgh));
+	vec.iov_base = buf;
+	vec.iov_len = len;
+
+	return kernel_recvmsg(sock, &msgh, &vec, 1, len, flags);
+}
+EXPORT_SYMBOL_GPL(db_vsock_recvmsg);
+
+static int db_vsock_recvcmd(struct socket *cli_socket, char *cmd)
+{
+	int ret;
+	char rcv;
+	long timeout;
+	struct kvec vec;
+	struct msghdr msg;
+
+	memset(&vec, 0, sizeof(vec));
+	memset(&msg, 0, sizeof(msg));
+	vec.iov_base = &rcv;
+	vec.iov_len = 1;
+
+	timeout = cli_socket->sk->sk_rcvtimeo;
+	cli_socket->sk->sk_rcvtimeo = DB_INIT_TIMEOUT * HZ;
+	ret = kernel_recvmsg(cli_socket, &msg, &vec, 1, 1, 0);
+	cli_socket->sk->sk_rcvtimeo = timeout;
+	*cmd = rcv;
+
+	return ret;
+}
+
+/*
+ * The workqueue handler for vsock work_struct.
+ *
+ * Each worker-pool bound to an actual CPU implements concurrency management
+ * by hooking into the scheduler. The worker-pool is notified whenever an
+ * active worker wakes up or sleeps and keeps track of the number of the
+ * currently runnable workers. Generally, work items are not expected to hog
+ * a CPU and consume many cycles. That means maintaining just enough concurrency
+ * to prevent work processing from stalling should be optimal.
+ *
+ * So it's OK to sleep in a workqueue handler, it won't cause too many worker
+ * threads.
+ */
+static void db_conn_service(struct work_struct *work)
+{
+	struct db_conn_info *conn_info =
+		container_of(work, struct db_conn_info, work);
+	struct db_service_entry *service_entry;
+	int len, ret = -1;
+	char cmd;
+
+	len = db_vsock_recvcmd(conn_info->sock, &cmd);
+	if (len <= 0)
+		goto recv_failed;
+
+	mutex_lock(&db_service_lock);
+	list_for_each_entry(service_entry, &db_service_list, list) {
+		if (cmd == service_entry->cmd) {
+			ret = service_entry->handler(conn_info->sock);
+			break;
+		}
+	}
+	mutex_unlock(&db_service_lock);
+
+recv_failed:
+	if (ret) {
+		sock_release(conn_info->sock);
+		pr_info("Client connection closed, error code: %d\n", ret);
+	}
+	kfree(conn_info);
+}
+
+static int db_create_cli_conn(struct socket *sock)
+{
+	struct db_conn_info *conn;
+
+	conn = kmalloc(sizeof(*conn), GFP_KERNEL);
+	if (!conn)
+		return -ENOMEM;
+
+	conn->sock = sock;
+	INIT_WORK(&conn->work, db_conn_service);
+	schedule_work(&conn->work);
+
+	return 0;
+}
+
+static int db_vsock_server(void *data)
+{
+	struct socket *sock;
+	int err;
+
+	sock = db_create_vsock_listener(db_server_port);
+	if (IS_ERR(sock)) {
+		err = PTR_ERR(sock);
+		pr_err("Init server err: %d\n", err);
+		return err;
+	}
+
+	while (!kthread_should_stop()) {
+		struct socket *conn;
+		struct proto_accept_arg arg;
+
+		conn = sock_alloc();
+		if (!conn)
+			return -ENOMEM;
+
+		conn->type = sock->type;
+		conn->ops  = sock->ops;
+
+		/* 0:propotal 1:kernel */
+		arg.flags = 0;
+		arg.kern = true;
+
+		err = sock->ops->accept(sock, conn, &arg);
+		if (err < 0) {
+			pr_err("Server accept err: %d\n", err);
+			sock_release(conn);
+			continue;
+		}
+
+		err = db_create_cli_conn(conn);
+		if (err)
+			pr_err("Create client connetion err: %d\n", err);
+	}
+
+	return 0;
+}
+
+static int db_create_service(void)
+{
+	struct task_struct *service;
+	int rc = 0;
+
+	service = kthread_create(db_vsock_server, NULL, "db-vsock-srv");
+	if (IS_ERR(service)) {
+		rc = PTR_ERR(service);
+		pr_err("Server task create failed, err: %d\n", rc);
+	} else {
+		db_service_task = service;
+		wake_up_process(service);
+	}
+	return rc;
+}
+
+static int db_vsock_srv_cmdline_set(const char *device,
+				    const struct kernel_param *kp)
+{
+	unsigned int port = 0;
+	int processed, consumed = 0;
+
+	/* Get "@<port>" */
+	processed = sscanf(device, "@%u%n", &port, &consumed);
+	if (processed < 1 || device[consumed] || port == 0 || port > 1024) {
+		pr_err("Using @<port> format and port range (0, 1024].\n");
+		return -EINVAL;
+	}
+
+	db_server_port = port;
+	return 0;
+}
+
+static const struct kernel_param_ops db_vsock_srv_cmdline_param_ops = {
+	.set = db_vsock_srv_cmdline_set,
+};
+
+device_param_cb(port, &db_vsock_srv_cmdline_param_ops, NULL, 0400);
+
+int register_db_vsock_service(const char cmd, db_vsock_svc_handler_t handler)
+{
+	int rc = -EEXIST;
+	struct db_service_entry *service_entry;
+
+	mutex_lock(&db_service_lock);
+	list_for_each_entry(service_entry, &db_service_list, list) {
+		if (cmd == service_entry->cmd) {
+			rc = -EEXIST;
+			goto out;
+		}
+	}
+
+	service_entry = kzalloc(sizeof(*service_entry), GFP_KERNEL);
+	if (!service_entry) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	service_entry->cmd = cmd;
+	service_entry->handler = handler;
+	list_add_tail(&service_entry->list, &db_service_list);
+	rc = 0;
+out:
+	mutex_unlock(&db_service_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(register_db_vsock_service);
+
+int unregister_db_vsock_service(const char cmd)
+{
+	int rc = -EEXIST;
+	struct db_service_entry *service_entry, *n;
+
+	mutex_lock(&db_service_lock);
+	list_for_each_entry_safe(service_entry, n, &db_service_list, list) {
+		if (cmd == service_entry->cmd) {
+			list_del(&service_entry->list);
+			rc = 0;
+			break;
+		}
+	}
+	mutex_unlock(&db_service_lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(unregister_db_vsock_service);
+
+static int __init db_vsock_srv_init(void)
+{
+	return db_create_service();
+}
+
+late_initcall(db_vsock_srv_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball vsock server");
+MODULE_LICENSE("GPL v2");
+diff --git a/include/dragonball/upcall_srv.h b/include/dragonball/upcall_srv.h
+new file mode 100644
+index 0000000000000..1c733982cc30d
+--- /dev/null
+++ b/include/dragonball/upcall_srv.h
+@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * db_upcall_srv.h  Virtual Sockets Server for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_UPCALL_SRV_H
+#define _DB_UPCALL_SRV_H
+
+#include <linux/workqueue.h>
+#include <linux/net.h>
+
+/* Vsock port to listen for incoming connections. */
+#define DB_SERVER_PORT				0xDB
+#define DB_RECVBUF_SIZE			0x400
+#define DB_INIT_TIMEOUT			10
+
+/*
+ * Vsock service handler to handle new incoming connections.
+ *
+ * Return:
+ * 0: on success and the callback takes ownership of the sock.
+ * !0: on failure and the callback should keep the sock as is.
+ */
+typedef int (*db_vsock_svc_handler_t) (struct socket *sock);
+
+extern int register_db_vsock_service(const char cmd,
+				     db_vsock_svc_handler_t handler);
+extern int unregister_db_vsock_service(const char cmd);
+
+extern struct socket *db_create_vsock_listener(unsigned int port);
+extern int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len);
+extern int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len,
+			    int flags);
+
+#endif /* _DB_UPCALL_SRV_H */
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0002-upcall-introduce-device-manager-upcall-service.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0002-upcall-introduce-device-manager-upcall-service.patch
@@ -0,0 +1,330 @@
+From 81b293100f31d303cd0f611063cf8b4f167cb4f7 Mon Sep 17 00:00:00 2001
+From: Chao Wu <chaowu@linux.alibaba.com>
+Date: Mon, 21 Nov 2022 19:19:26 +0800
+Subject: [PATCH 2/8] upcall: introduce device manager upcall service
+
+Different services are registered into upcall server to handle the
+request from the client side. This commit introduces devic manager
+upcall service and when new message gets into upcall server, cmd `d` is
+used for identifying the device manager service.
+
+After a request is sent to device manager service, db_devmgr_handler
+will start handle the request. A kthread `db_devmgr_server` will be
+created and it will send CONNECT message to the client side to notify
+the client start sending message for device management operations.
+`db_devmgr_process` will be used for determining which device operations
+will be triggered through msg_type. `get_action` will find out the
+action for dealing with the operation and `action` fn will execute the
+actual device management operation in the device manager service.
+
+Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
+Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
+Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
+Signed-off-by: WangYu <WangYu@linux.alibaba.com>
+Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
+---
+ drivers/misc/dragonball/upcall_srv/Kconfig    |  12 +
+ drivers/misc/dragonball/upcall_srv/Makefile   |   1 +
+ .../upcall_srv/dragonball_device_manager.c    | 235 ++++++++++++++++++
+ include/dragonball/device_manager.h           |  18 ++
+ 4 files changed, 266 insertions(+)
+ create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+ create mode 100644 include/dragonball/device_manager.h
+
+diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
+index b00bf1f8637db..6554a9741c00d 100644
+--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
+@@ -12,3 +12,15 @@ config DRAGONBALL_UPCALL_SRV
+ 	  Sockets communication channels between guest and host/vmm.
+ 
+ 	  If unsure, say N.
+
+config DRAGONBALL_DEVICE_MANAGER
+	bool "Vsock Service to Handle Dragonball Device Management Requests"
+	depends on DRAGONBALL_UPCALL_SRV
+	depends on VIRTIO_VSOCKETS
+	default y
+	help
+	  This configure implements a vsock service to handle Dragonball device
+	  management requests, such as getting device information, hot-plugging
+	  devices etc.
+
+	  If unsure, say N.
+diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
+index 4102e6c7edefd..409c0c11e2e66 100644
+--- a/drivers/misc/dragonball/upcall_srv/Makefile
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
+@@ -11,3 +11,4 @@
+ #
+ 
+ obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) 	+= dragonball_upcall_srv.o
+obj-$(CONFIG_DRAGONBALL_DEVICE_MANAGER)	+= dragonball_device_manager.o
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+new file mode 100644
+index 0000000000000..ebcb6ef742855
+--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/vsock_srv/dragonball_device_manager.c
+ * vsock service for device management.
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-dev-mgr: " fmt
+
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/virtio_mmio.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
+#include <asm/cpu.h>
+#include <dragonball/upcall_srv.h>
+#include <dragonball/device_manager.h>
+#ifdef CONFIG_ARM64
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#endif
+#include <linux/percpu.h>
+#include <linux/device.h>
+#include <asm/numa.h>
+
+/*
+ * Following designs are adopted to simplify implementation:
+ * 1) fix size messages with padding to ease receiving logic.
+ * 2) binary encoding instead of string encoding because it's on the same host.
+ * 3) synchronous communication in ping-pong mode, one in-fly request at most.
+ * 4) do not support module unloading
+ */
+
+/* These definitions are synchronized with dragonball */
+#define DEV_MGR_MSG_SIZE			0x400
+#define DEVMGR_CMD_BYTE			'd'
+#define DEVMGR_MAGIC_VERSION		0x444D0100 /* 'DM' + Version 1.0 */
+#define SHARED_IRQ_NO			5
+
+/* Type of request and reply messages. */
+enum devmgr_msg_type {
+	CONNECT			= 0x00000000,
+	ADD_CPU			= 0x00000001,
+	DEL_CPU			= 0x00000002,
+	ADD_MEM			= 0x00000003,
+	DEL_MEM			= 0x00000004,
+	ADD_MMIO		= 0x00000005,
+	DEL_MMIO		= 0x00000006,
+	ADD_PCI			= 0x00000007,
+	DEL_PCI			= 0x00000008,
+};
+
+struct devmgr_msg_header {
+	/* magic version for identifying upcall */
+	uint32_t	magic_version;
+	/* size of the upcall message */
+	uint32_t	msg_size;
+	/* type for the message to identify its usage */
+	uint32_t	msg_type;
+	/* flag for extra information */
+	uint32_t	msg_flags;
+};
+
+struct devmgr_req {
+	struct devmgr_msg_header msg_header;
+	union {
+		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+	} msg_load;
+};
+
+struct devmgr_reply {
+	struct devmgr_msg_header msg_header;
+	/*
+	 * if ret is 0, it means the operation is successful.
+	 * if ret is not 0, return value will be error code.
+	 */
+	int32_t ret;
+	union {
+		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+	} msg_load;
+};
+
+struct task_res {
+	struct task_struct	*task;
+	struct socket		*sock;
+	struct devmgr_req	req;
+	struct devmgr_reply	reply;
+};
+
+typedef int (*action_route_t) (struct devmgr_req *req,
+			       struct devmgr_reply *rep);
+
+static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+			     uint32_t msg_type, uint32_t msg_flags)
+{
+	msg->magic_version = DEVMGR_MAGIC_VERSION;
+	msg->msg_size      = msg_size;
+	msg->msg_type      = msg_type;
+	msg->msg_flags     = msg_flags;
+}
+
+static struct {
+	enum devmgr_msg_type cmd;
+	action_route_t fn;
+} opt_map[] = {
+};
+
+static action_route_t get_action(struct devmgr_req *req)
+{
+	int i;
+	action_route_t action = NULL;
+	int size_opt = ARRAY_SIZE(opt_map);
+
+	for (i = 0; i < size_opt; i++) {
+		if (opt_map[i].cmd == req->msg_header.msg_type) {
+			action = opt_map[i].fn;
+			break;
+		}
+	}
+	return action;
+}
+
+static void db_devmgr_process(struct devmgr_req *req,
+			      struct devmgr_reply *rep)
+{
+	int err;
+	action_route_t action;
+	struct devmgr_msg_header *req_mh = &req->msg_header;
+	struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+	if (req_mh->magic_version != DEVMGR_MAGIC_VERSION) {
+		_fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+		return;
+	}
+
+	action = get_action(req);
+	if (action == NULL) {
+		pr_err("db_devmgr_process : Not found valid command");
+		rep->ret = -1;
+		_fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+		return;
+	}
+
+	err = action(req, rep);
+	if (err) {
+		pr_err("db_devmgr_process : Command run failed, err: %d", err);
+		rep->ret = err;
+		_fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+		return;
+	}
+}
+
+static int db_devmgr_server(void *data)
+{
+	struct task_res *res = (struct task_res *)data;
+	struct devmgr_msg_header *rep_mh = &res->reply.msg_header;
+	int len;
+
+	_fill_msg_header(rep_mh, 0, CONNECT, 0);
+	len = db_vsock_sendmsg(res->sock, (char *)&res->reply, DEV_MGR_MSG_SIZE);
+	if (len <= 0) {
+		pr_err("db_devmgr_server : Server send message failed, err: %d", len);
+		sock_release(res->sock);
+		kfree(res);
+		return len;
+	}
+
+	while (!kthread_should_stop()) {
+		len = db_vsock_recvmsg(res->sock, (char *)&res->req,
+				       DEV_MGR_MSG_SIZE, 0);
+		if (len <= 0)
+			break;
+
+		/* The result(OK or Error) will fill into res->reply field */
+		db_devmgr_process(&res->req, &res->reply);
+
+		len = db_vsock_sendmsg(res->sock, (char *)&res->reply,
+				       DEV_MGR_MSG_SIZE);
+		if (len <= 0)
+			break;
+	}
+
+	/* TODO: check who shutdown the socket, receiving or sending. */
+	sock_release(res->sock);
+	kfree(res);
+	return 0;
+}
+
+static int db_devmgr_handler(struct socket *sock)
+{
+	struct task_res *res;
+	struct task_struct *conn_task;
+
+	/* TODO: ensure singleton, only one server exists */
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	res->sock = sock;
+	conn_task = kthread_create(db_devmgr_server, res, "db_dev_mgr");
+	if (IS_ERR(conn_task)) {
+		pr_err("db_devmgr_handler : Client process thread create failed, err: %d",
+		       (int)PTR_ERR(conn_task));
+		goto failed;
+	} else {
+		res->task = conn_task;
+		wake_up_process(conn_task);
+	}
+
+	return 0;
+failed:
+	kfree(res);
+	return PTR_ERR(conn_task);
+}
+
+static int __init db_device_manager_init(void)
+{
+	return register_db_vsock_service(DEVMGR_CMD_BYTE, db_devmgr_handler);
+}
+
+late_initcall(db_device_manager_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball Device Manager");
+MODULE_LICENSE("GPL v2");
+diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
+new file mode 100644
+index 0000000000000..a1713e9f026d1
+--- /dev/null
+++ b/include/dragonball/device_manager.h
+@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * device_manager.h  Device Manager for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_DEVICE_MANAGER_H
+#define _DB_DEVICE_MANAGER_H
+
+#include <linux/device.h>
+
+#endif /* _DB_DEVICE_MANAGER_H */
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0003-upcall-add-cpu-hotplug-hot-unplug-into-device-manage.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0003-upcall-add-cpu-hotplug-hot-unplug-into-device-manage.patch
@@ -0,0 +1,326 @@
+From f2afaacf8d2d5dbf597fc4ffed70e8804a26b130 Mon Sep 17 00:00:00 2001
+From: Chao Wu <chaowu@linux.alibaba.com>
+Date: Mon, 21 Nov 2022 19:44:50 +0800
+Subject: [PATCH 3/8] upcall: add cpu hotplug/hot-unplug into device manager
+ service
+
+Add cpu hotplug and hot-unplug support into device manager. In the
+`devmgr_req` message, `msg_type` ADD_CPU in `msg_header` will trigger
+`add_cpu_dev` action and DEL_CPU will trigger `del_cpu_dev` action, and
+we use `apic_ids` and `count` delivered in `cpu_dev_info` to notify
+which and how many cpus will be hotplugged / hot-unplugged.
+
+`add_cpu_dev` and `del_cpu_dev` will eventually trigger `add_cpu_upcall`
+and `del_cpu_upcall` to trigger the cpu hotplug / hot-unplug process in
+the kernel. After the cpu hotplug / hot-unplug process,
+`cpu_event_notification` will generate device manager reply to the
+client side.
+
+Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
+Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
+Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
+Signed-off-by: WangYu <WangYu@linux.alibaba.com>
+Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
+Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
+---
+ drivers/misc/dragonball/upcall_srv/Kconfig    |  11 +
+ .../upcall_srv/dragonball_device_manager.c    | 236 ++++++++++++++++++
+ 2 files changed, 247 insertions(+)
+
+diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
+index 6554a9741c00d..b237882a29288 100644
+--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
+@@ -24,3 +24,14 @@ config DRAGONBALL_DEVICE_MANAGER
+ 	  devices etc.
+ 
+ 	  If unsure, say N.
+
+config DRAGONBALL_HOTPLUG_CPU
+	bool "CPU hotplug/hotunplug support"
+	depends on DRAGONBALL_DEVICE_MANAGER
+	default y
+	help
+	  This configure implements a vCPU hotplug/hotunplug support, vmm
+	  should send hotplug request by vsock which follow special data
+	  structure with command and parameter to hot-pluging an vCPU.
+
+	  If unsure, say N.
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+index ebcb6ef742855..16c6b937c5536 100644
+--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+@@ -23,6 +23,10 @@
+ #include <linux/cpumask.h>
+ #include <linux/cpuhotplug.h>
+ #include <asm/cpu.h>
+#ifdef CONFIG_X86_64
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#endif
+ #include <dragonball/upcall_srv.h>
+ #include <dragonball/device_manager.h>
+ #ifdef CONFIG_ARM64
+@@ -75,9 +79,20 @@ struct devmgr_req {
+ 	struct devmgr_msg_header msg_header;
+ 	union {
+ 		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+		struct {
+			uint8_t count;
+			uint8_t apic_ver;
+			uint8_t apic_ids[256];
+		} cpu_dev_info;
+#endif
+ 	} msg_load;
+ };
+ 
+struct cpu_dev_reply_info {
+	uint32_t apic_index;
+};
+
+ struct devmgr_reply {
+ 	struct devmgr_msg_header msg_header;
+ 	/*
+@@ -87,6 +102,9 @@ struct devmgr_reply {
+ 	int32_t ret;
+ 	union {
+ 		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+	struct cpu_dev_reply_info cpu_dev_info;
+#endif
+ 	} msg_load;
+ };
+ 
+@@ -109,10 +127,228 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+ 	msg->msg_flags     = msg_flags;
+ }
+ 
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int get_cpu_id(int apic_id)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (cpu_physical_id(i) == apic_id)
+			return i;
+	}
+	return -1;
+}
+
+static int lookup_cpuid(int apic_id)
+{
+	int i;
+
+	/* CPU# to APICID mapping is persistent once it is established */
+	for_each_possible_cpu(i) {
+		if (cpuid_to_apicid[i] == apic_id)
+			return i;
+	}
+	return -ENODEV;
+}
+
+/**
+ * Return the first failed hotplug index of the apic_ids to dragonball.
+ * If it is not equal to the count of all hotplug needed vcpus,
+ * we will rollback the vcpus from apics_ids[0] to apic_ids[i-1] in dragonball.
+ */
+static void cpu_event_notification(
+	uint8_t apic_ids_index,
+	int ret,
+	uint32_t action_type,
+	struct devmgr_reply *rep)
+{
+	pr_info("cpu event notification: apic ids index %d", apic_ids_index);
+	rep->msg_load.cpu_dev_info.apic_index = apic_ids_index;
+	rep->ret = ret;
+	_fill_msg_header(&rep->msg_header,
+	sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
+
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
+{
+	int cpu_id, node_id;
+	int ret;
+
+	pr_info("adding vcpu apic_id %d", apic_id);
+
+	/**
+	 * Get the mutex lock for hotplug and cpu update and cpu write lock.
+	 * So that other threads won't influence the hotplug process.
+	 */
+	lock_device_hotplug();
+	cpu_maps_update_begin();
+	cpus_write_lock();
+
+	cpu_id = lookup_cpuid(apic_id);
+	if (cpu_id < 0) {
+		pr_err("cpu (apic id %d) cannot be added, generic processor info failed", apic_id);
+		ret = -EINVAL;
+		goto rollback_generic_cpu;
+	}
+
+	set_bit(apic_id, phys_cpu_present_map);
+	early_per_cpu(x86_cpu_to_apicid, cpu_id) = apic_id;
+	set_cpu_present(cpu_id, true);
+
+	/* update numa mapping for hot-plugged cpus. */
+	node_id = numa_cpu_node(cpu_id);
+	if (node_id != NUMA_NO_NODE)
+		numa_set_node(cpu_id, node_id);
+
+	ret = arch_register_cpu(cpu_id);
+	if (ret) {
+		pr_err("cpu %d cannot be added, register cpu failed %d", cpu_id, ret);
+		goto rollback_register_cpu;
+	}
+
+	cpus_write_unlock();
+	cpu_maps_update_done();
+	unlock_device_hotplug();
+
+	ret = add_cpu(cpu_id);
+	if (ret) {
+		pr_err("cpu %d cannot be added, cpu up failed: %d", cpu_id, ret);
+		goto rollback_cpu_up;
+	}
+	return ret;
+
+rollback_cpu_up:
+	arch_unregister_cpu(cpu_id);
+	set_cpu_present(cpu_id, false);
+	per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+	return ret;
+
+rollback_register_cpu:
+	set_cpu_present(cpu_id, false);
+	per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+rollback_generic_cpu:
+	cpus_write_unlock();
+	cpu_maps_update_done();
+	unlock_device_hotplug();
+	return ret;
+}
+
+static int del_cpu_upcall(int apic_id)
+{
+	int cpu_id = get_cpu_id(apic_id);
+	int ret;
+
+	if (cpu_id == 0) {
+		pr_err("cannot del bootstrap processor.");
+		return -EINVAL;
+	}
+	pr_info("deleting vcpu %d", cpu_id);
+	ret = remove_cpu(cpu_id);
+	if (ret) {
+		pr_err("del vcpu failed, err: %d", ret);
+		return ret;
+	}
+
+	lock_device_hotplug();
+	cpu_maps_update_begin();
+	cpus_write_lock();
+
+	arch_unregister_cpu(cpu_id);
+	set_cpu_present(cpu_id, false);
+	per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+
+	cpus_write_unlock();
+	cpu_maps_update_done();
+	unlock_device_hotplug();
+
+	return ret;
+}
+
+static int add_cpu_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret;
+	uint8_t i;
+	int apic_id;
+
+	uint8_t count = req->msg_load.cpu_dev_info.count;
+	uint8_t apic_ver = req->msg_load.cpu_dev_info.apic_ver;
+	uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+	pr_info("add vcpu number: %d", count);
+
+	for (i = 0; i < count; ++i) {
+		apic_id = apic_ids[i];
+		if (get_cpu_id(apic_id) != -1) {
+			pr_err("cpu cannot be added: apci_id %d is already been used.", apic_id);
+			ret = -EINVAL;
+			return ret;
+		}
+	}
+
+	for (i = 0; i < count; ++i) {
+		apic_id = apic_ids[i];
+		ret = add_cpu_upcall(apic_id, apic_ver);
+		if (ret != 0)
+			break;
+	}
+
+	if (!ret)
+		cpu_event_notification(i, ret, ADD_CPU, rep);
+	return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret;
+	uint8_t i;
+	int cpu_id;
+
+	uint8_t count = req->msg_load.cpu_dev_info.count;
+	uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+	pr_info("del vcpu number : %d", count);
+
+	if (count >= num_online_cpus()) {
+		pr_err("cpu del parameter check error: cannot remove all vcpus");
+		ret = -EINVAL;
+		cpu_event_notification(0, ret, DEL_CPU, rep);
+		return ret;
+	}
+
+	for (i = 0; i < count; ++i) {
+		cpu_id = get_cpu_id(apic_ids[i]);
+		if (!cpu_possible(cpu_id)) {
+			pr_err("cpu %d cannot be deleted: cpu not possible", cpu_id);
+			ret = -EINVAL;
+			cpu_event_notification(0, ret, DEL_CPU, rep);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < count; ++i) {
+		ret = del_cpu_upcall(apic_ids[i]);
+		if (ret != 0)
+			break;
+	}
+
+	if (!ret)
+		cpu_event_notification(i, ret, DEL_CPU, rep);
+	return ret;
+}
+#endif
+
+ static struct {
+ 	enum devmgr_msg_type cmd;
+ 	action_route_t fn;
+ } opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+	{ADD_CPU, add_cpu_dev},
+	{DEL_CPU, del_cpu_dev},
+#endif
+ };
+ 
+ static action_route_t get_action(struct devmgr_req *req)
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0004-upcall-add-virtio-mmio-hotplug-hot-unplug-into-devic.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0004-upcall-add-virtio-mmio-hotplug-hot-unplug-into-devic.patch
@@ -0,0 +1,419 @@
+From 5109240e29df451e1d6f4474620ef6a2f24349b7 Mon Sep 17 00:00:00 2001
+From: Chao Wu <chaowu@linux.alibaba.com>
+Date: Wed, 23 Nov 2022 19:23:47 +0800
+Subject: [PATCH 4/8] upcall: add virtio-mmio hotplug/hot-unplug into device
+ manager service
+
+Add virtio-mmio hotplug/hot-unplug support into device manager. In the
+`devmgr_req` message, `msg_type` ADD_MMIO in `msg_header` will trigger
+`add_mmio_dev` action and DEL_MMIO will trigger `del_mmio_dev` action,
+and we use `mmio_base`, `mmio_size` and `mmio_irq` delivered in
+`add_mmio_dev` to notify how to hotplug the virtio-mmio device
+
+Also `virtio_mmio_add_device` and `virtio_mmio_del_device` are
+introduced under /drivers/virtio/virtio_mmio.c, and we extract
+`vm_add_device` from `vm_cmdline_set` to help hotplug virtio-mmio
+device.
+
+Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
+Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
+Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
+Signed-off-by: WangYu <WangYu@linux.alibaba.com>
+Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
+---
+ drivers/misc/dragonball/upcall_srv/Kconfig    |  12 ++
+ .../upcall_srv/dragonball_device_manager.c    | 112 ++++++++++++++
+ drivers/virtio/Kconfig                        |  14 ++
+ drivers/virtio/virtio_mmio.c                  | 140 +++++++++++++++---
+ include/dragonball/device_manager.h           |   5 +
+ 5 files changed, 260 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
+index b237882a29288..fc83f03c2edd2 100644
+--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
+@@ -25,6 +25,18 @@ config DRAGONBALL_DEVICE_MANAGER
+ 
+ 	  If unsure, say N.
+ 
+config DRAGONBALL_HOTPLUG_VIRTIO_MMIO
+	bool "Virtio-MMIO device hotplug/hotunplug support"
+	depends on DRAGONBALL_DEVICE_MANAGER
+	default y
+	help
+	  This configure implements a Virtio-MMIO device hotplug/hotunplug
+	  support, vmm should send hotplug request by vsock which follow
+	  special data structure with command and parameter to hot-pluging
+	  an MMIO device.
+
+	  If unsure, say N.
+
+ config DRAGONBALL_HOTPLUG_CPU
+ 	bool "CPU hotplug/hotunplug support"
+ 	depends on DRAGONBALL_DEVICE_MANAGER
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+index 16c6b937c5536..f591841715c3b 100644
+--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+@@ -79,6 +79,13 @@ struct devmgr_req {
+ 	struct devmgr_msg_header msg_header;
+ 	union {
+ 		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+		struct {
+			uint64_t mmio_base;
+			uint64_t mmio_size;
+			uint32_t mmio_irq;
+		} add_mmio_dev;
+#endif
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ 		struct {
+ 			uint8_t count;
+@@ -102,6 +109,10 @@ struct devmgr_reply {
+ 	int32_t ret;
+ 	union {
+ 		char	pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+		struct {
+		} add_mmio_dev;
+#endif
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ 	struct cpu_dev_reply_info cpu_dev_info;
+ #endif
+@@ -118,6 +129,62 @@ struct task_res {
+ typedef int (*action_route_t) (struct devmgr_req *req,
+ 			       struct devmgr_reply *rep);
+ 
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+#ifdef CONFIG_ARM64
+static uint32_t get_device_virq(uint32_t pin)
+{
+	uint32_t virq;
+	struct device_node *node;
+	struct irq_fwspec dummy_fwspec = {
+		.param_count = 3,
+		.param = {0, 0, IRQ_TYPE_EDGE_RISING}
+	};
+
+	node = of_find_node_by_name(NULL, "intc");
+	if (!node) {
+		pr_err("interrupt controller device node not found.");
+		return 0;
+	}
+	dummy_fwspec.param[1] = pin;
+	dummy_fwspec.fwnode = of_node_to_fwnode(node);
+	virq = irq_create_fwspec_mapping(&dummy_fwspec);
+	of_node_put(node);
+	return virq;
+}
+#elif defined(CONFIG_X86_64)
+static inline uint32_t get_device_virq(uint32_t irq)
+{
+	return irq;
+}
+#endif
+
+static int get_dev_resource(struct devmgr_req *req, struct resource *res)
+{
+	uint64_t base = req->msg_load.add_mmio_dev.mmio_base;
+	uint64_t size = req->msg_load.add_mmio_dev.mmio_size;
+	uint32_t irq  = req->msg_load.add_mmio_dev.mmio_irq;
+	uint32_t virq;
+
+	if (req->msg_header.msg_size != sizeof(req->msg_load.add_mmio_dev))
+		return -EINVAL;
+
+	res[0].flags = IORESOURCE_MEM;
+	res[0].start = base;
+	res[0].end   = base + size - 1;
+	res[1].flags = IORESOURCE_IRQ;
+	virq = get_device_virq(irq);
+	if (!virq)
+		return -EINVAL;
+	res[1].start = res[1].end = virq;
+
+	/* detect the irq sharing mode */
+	if (irq == SHARED_IRQ_NO)
+		res[1].flags |= IORESOURCE_IRQ_SHAREABLE;
+
+	return 0;
+}
+#endif
+
+ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+ 			     uint32_t msg_type, uint32_t msg_flags)
+ {
+@@ -170,6 +237,47 @@ static void cpu_event_notification(
+ }
+ #endif
+ 
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+static int add_mmio_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret;
+	struct resource res[2] = {};
+	struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+	ret = get_dev_resource(req, res);
+	if (ret)
+		return ret;
+
+	ret = virtio_mmio_add_device(res, ARRAY_SIZE(res));
+	if (!ret) {
+		rep->ret = ret;
+		_fill_msg_header(rep_mh, 0, ADD_MMIO, 0);
+	}
+	return ret;
+}
+
+static int del_mmio_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret;
+	struct resource res[2] = {};
+	struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+	ret = get_dev_resource(req, res);
+	if (ret)
+		return ret;
+
+	ret = virtio_mmio_del_device(res, ARRAY_SIZE(res));
+	if (!ret) {
+		rep->ret = ret;
+		_fill_msg_header(rep_mh, 0, DEL_MMIO, 0);
+	}
+	return ret;
+}
+#endif
+
+
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+ static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
+ {
+@@ -345,6 +453,10 @@ static struct {
+ 	enum devmgr_msg_type cmd;
+ 	action_route_t fn;
+ } opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+	{ADD_MMIO, add_mmio_dev},
+	{DEL_MMIO, del_mmio_dev},
+#endif
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+ 	{ADD_CPU, add_cpu_dev},
+ 	{DEL_CPU, del_cpu_dev},
+diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
+index 6db5235a7693d..eaf9d1bea38fc 100644
+--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
+@@ -31,6 +31,20 @@ menuconfig VIRTIO_MENU
+ 	bool "Virtio drivers"
+ 	default y
+ 
+config VIRTIO_MMIO_DRAGONBALL
+	bool "Enable features for Dragonball virtio MMIO devices"
+	default n
+	depends on VIRTIO_MMIO
+	depends on X86_64 || ARM64
+	select X86_PLATFORM_MSI
+	select VIRTIO_MMIO_MSI
+	help
+	  The Dragonball VMM implements several optimizations for MMIO virtio
+	  devices. This option enables support of those optimization features:
+	  - virtio-mmio hotplug through upcall
+
+	  If unsure, say N
+
+ if VIRTIO_MENU
+ 
+ config VIRTIO_HARDEN_NOTIFICATION
+diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
+index b152a1eca05ad..a8a7251e8c030 100644
+--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
+@@ -669,16 +669,42 @@ static struct device vm_cmdline_parent = {
+ static int vm_cmdline_parent_registered;
+ static int vm_cmdline_id;
+ 
+static int vm_add_device(struct resource *resources, size_t res_size)
+{
+       int err;
+       struct platform_device *pdev;
+
+       if (!vm_cmdline_parent_registered) {
+               err = device_register(&vm_cmdline_parent);
+               if (err) {
+                       pr_err("Failed to register parent device!\n");
+		       put_device(&vm_cmdline_parent);
+                       return err;
+               }
+               vm_cmdline_parent_registered = 1;
+       }
+
+       pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
+                      vm_cmdline_id,
+                      (unsigned long long)resources[0].start,
+                      (unsigned long long)resources[0].end,
+                  (int)resources[1].start);
+
+       pdev = platform_device_register_resndata(&vm_cmdline_parent,
+                       "virtio-mmio", vm_cmdline_id++,
+                       resources, res_size, NULL, 0);
+
+       return PTR_ERR_OR_ZERO(pdev);
+}
+
+ static int vm_cmdline_set(const char *device,
+ 		const struct kernel_param *kp)
+ {
+-	int err;
+ 	struct resource resources[2] = {};
+ 	char *str;
+ 	long long base, size;
+ 	unsigned int irq;
+ 	int processed, consumed = 0;
+-	struct platform_device *pdev;
+ 
+ 	/* Consume "size" part of the command line parameter */
+ 	size = memparse(device, &str);
+@@ -703,27 +729,7 @@ static int vm_cmdline_set(const char *device,
+ 	resources[1].flags = IORESOURCE_IRQ;
+ 	resources[1].start = resources[1].end = irq;
+ 
+-	if (!vm_cmdline_parent_registered) {
+-		err = device_register(&vm_cmdline_parent);
+-		if (err) {
+-			put_device(&vm_cmdline_parent);
+-			pr_err("Failed to register parent device!\n");
+-			return err;
+-		}
+-		vm_cmdline_parent_registered = 1;
+-	}
+-
+-	pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
+-		       vm_cmdline_id,
+-		       (unsigned long long)resources[0].start,
+-		       (unsigned long long)resources[0].end,
+-		       (int)resources[1].start);
+-
+-	pdev = platform_device_register_resndata(&vm_cmdline_parent,
+-			"virtio-mmio", vm_cmdline_id++,
+-			resources, ARRAY_SIZE(resources), NULL, 0);
+-
+-	return PTR_ERR_OR_ZERO(pdev);
+	return vm_add_device(resources, ARRAY_SIZE(resources));
+ }
+ 
+ static int vm_cmdline_get_device(struct device *dev, void *data)
+@@ -773,6 +779,94 @@ static void vm_unregister_cmdline_devices(void)
+ 	}
+ }
+ 
+#ifdef CONFIG_DRAGONBALL_DEVICE_MANAGER
+static int vm_match_device(struct device *dev, const void *data)
+{
+	struct resource *resource = (struct resource *)data;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	if ((pdev->resource[0].start == resource[0].start)  &&
+		(pdev->resource[0].end == resource[0].end) &&
+		(pdev->resource[1].start == resource[1].start))
+		return 1;
+	return 0;
+}
+
+static struct device *vm_find_device(struct resource *res)
+{
+	return device_find_child(&vm_cmdline_parent, res, vm_match_device);
+}
+
+static int vm_device_overlap(struct device *dev, const void *data)
+{
+	struct resource *res = (struct resource *)data;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	/* Detect IRQ number conflicts except shared IRQs. */
+	if (!(res[1].flags & IORESOURCE_IRQ_SHAREABLE) &&
+		(pdev->resource[1].start == res[1].start)) {
+		return 1;
+	}
+
+	/* Detect device MMIO addresses overlapping */
+	if ((pdev->resource[0].start < res[0].end) &&
+		(pdev->resource[0].end > res[0].start)) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct device *vm_detect_resource(struct resource *res)
+{
+	/* return NULL if no resource overlapped */
+	return device_find_child(&vm_cmdline_parent, res, vm_device_overlap);
+}
+
+int virtio_mmio_add_device(struct resource *resources, size_t res_size)
+{
+	int err;
+	struct device *dev;
+
+	if (res_size < 2 || !resources)
+		return -EINVAL;
+
+	dev = vm_detect_resource(resources);
+	if (dev) {
+		put_device(dev);
+		return -EEXIST;
+	}
+
+	lock_device_hotplug();
+	err = vm_add_device(resources, res_size);
+	unlock_device_hotplug();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_add_device);
+
+int virtio_mmio_del_device(struct resource *resources, size_t res_size)
+{
+	int ret;
+	struct device *dev;
+
+	if (res_size < 2 || !resources)
+		return -EINVAL;
+
+	dev = vm_find_device(resources);
+	if (!dev)
+		return -ENODEV;
+
+	put_device(dev);
+	lock_device_hotplug();
+	ret = vm_unregister_cmdline_device(dev, NULL);
+	unlock_device_hotplug();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_del_device);
+#endif	/* CONFIG_DRAGONBALL_DEVICE_MANAGER */
+
+ #else
+ 
+ static void vm_unregister_cmdline_devices(void)
+diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
+index a1713e9f026d1..785761c47f973 100644
+--- a/include/dragonball/device_manager.h
+++ b/include/dragonball/device_manager.h
+@@ -15,4 +15,9 @@
+ 
+ #include <linux/device.h>
+ 
+#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)
+int virtio_mmio_add_device(struct resource *resources, size_t res_size);
+int virtio_mmio_del_device(struct resource *resources, size_t res_size);
+#endif
+
+ #endif /* _DB_DEVICE_MANAGER_H */
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0005-upcall-dragonball-devmgr-supports-cpu-hotplug-on-arm.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0005-upcall-dragonball-devmgr-supports-cpu-hotplug-on-arm.patch
@@ -0,0 +1,162 @@
+From 9fecfb42b7da22604467053e88418c72745e0560 Mon Sep 17 00:00:00 2001
+From: xuejun-xj <jiyunxue@linux.alibaba.com>
+Date: Wed, 10 May 2023 13:55:43 +0800
+Subject: [PATCH 5/8] upcall: dragonball-devmgr supports cpu hotplug on arm64
+
+Enable vcpuhotplug feature on aarch64 in guest kernel. It communicates
+with dragonball by using upcall. This commit does these changes:
+
+1. Wraps x86 related fields with CONFIG_X86_64.
+2. Add "cpu_event_notification" for arm64.
+3. Add "add_cpu_dev" and "del_cpu_dev" for arm64.
+
+Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
+Reviewed-by : Chao Wu <chaowu@linux.alibaba.com>
+Reviewed-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+---
+ .../upcall_srv/dragonball_device_manager.c    | 84 ++++++++++++++++++-
+ 1 file changed, 81 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+index f591841715c3b..e2d1b7d56eafe 100644
+--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+@@ -89,15 +89,21 @@ struct devmgr_req {
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ 		struct {
+ 			uint8_t count;
+#ifdef CONFIG_X86_64
+ 			uint8_t apic_ver;
+ 			uint8_t apic_ids[256];
+#endif
+ 		} cpu_dev_info;
+ #endif
+ 	} msg_load;
+ };
+ 
+ struct cpu_dev_reply_info {
+#if defined(CONFIG_X86_64)
+ 	uint32_t apic_index;
+#elif defined(CONFIG_ARM64)
+	uint32_t cpu_id;
+#endif
+ };
+ 
+ struct devmgr_reply {
+@@ -194,7 +200,8 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+ 	msg->msg_flags     = msg_flags;
+ }
+ 
+-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
+ static int get_cpu_id(int apic_id)
+ {
+ 	int i;
+@@ -235,6 +242,24 @@ static void cpu_event_notification(
+ 	_fill_msg_header(&rep->msg_header,
+ 	sizeof(struct cpu_dev_reply_info), action_type, 0);
+ }
+#elif defined(CONFIG_ARM64)
+/**
+ * Return the first failed hotplug index of the cpu_id to dragonball.
+ * If hotplug/hotunplug succeeds, it will equals to the expected cpu count.
+ */
+static void cpu_event_notification(
+	uint8_t cpu_id,
+	int ret,
+	uint32_t action_type,
+	struct devmgr_reply *rep)
+{
+	pr_info("cpu event notification: cpu_id %d\n", cpu_id);
+	rep->msg_load.cpu_dev_info.cpu_id = cpu_id;
+	rep->ret = ret;
+	_fill_msg_header(&rep->msg_header,
+	sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
+ #endif
+ 
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+@@ -278,7 +303,8 @@ static int del_mmio_dev(struct devmgr_req *req,
+ #endif
+ 
+ 
+-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
+ static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
+ {
+ 	int cpu_id, node_id;
+@@ -447,6 +473,58 @@ static int del_cpu_dev(struct devmgr_req *req,
+ 		cpu_event_notification(i, ret, DEL_CPU, rep);
+ 	return ret;
+ }
+#elif defined(CONFIG_ARM64)
+static int add_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+	int i, ret = 0;
+	unsigned int cpu_id, nr_online_cpus;
+	uint8_t count = req->msg_load.cpu_dev_info.count;
+
+	nr_online_cpus = num_online_cpus();
+
+	pr_info("Current vcpu number: %d, Add vcpu number: %d\n",
+		nr_online_cpus, count);
+
+	for (i = 0; i < count; ++i) {
+		cpu_id = nr_online_cpus + i;
+		ret = add_cpu(cpu_id);
+		if (ret != 0)
+			break;
+	}
+
+	cpu_event_notification(nr_online_cpus + i, ret, ADD_CPU, rep);
+	return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+	int i, ret = 0;
+	unsigned int cpu_id, nr_online_cpus;
+	uint8_t count = req->msg_load.cpu_dev_info.count;
+
+	nr_online_cpus = num_online_cpus();
+
+	pr_info("Current vcpu number: %d, Delete vcpu number: %d\n",
+		nr_online_cpus, count);
+
+	if (count >= nr_online_cpus) {
+		pr_err("cpu del parameter check error: cannot remove all vcpus\n");
+		ret = -EINVAL;
+		cpu_event_notification(0, ret, DEL_CPU, rep);
+		return ret;
+	}
+
+	for (i = 0; i < count; ++i) {
+		cpu_id = nr_online_cpus - i - 1;
+		ret = remove_cpu(cpu_id);
+		if (ret != 0)
+			break;
+	}
+
+	cpu_event_notification(nr_online_cpus - i, ret, DEL_CPU, rep);
+	return ret;
+}
+#endif
+ #endif
+ 
+ static struct {
+@@ -457,7 +535,7 @@ static struct {
+ 	{ADD_MMIO, add_mmio_dev},
+ 	{DEL_MMIO, del_mmio_dev},
+ #endif
+-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ 	{ADD_CPU, add_cpu_dev},
+ 	{DEL_CPU, del_cpu_dev},
+ #endif
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0006-msi-control-msi-irq-number-activated.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0006-msi-control-msi-irq-number-activated.patch
@@ -0,0 +1,64 @@
+From 8f10003034a64ea5a562f912fee5037637084a9b Mon Sep 17 00:00:00 2001
+From: xuejun-xj <jiyunxue@linux.alibaba.com>
+Date: Wed, 10 May 2023 14:51:40 +0800
+Subject: [PATCH 6/8] msi: control msi irq number activated
+
+When passthroughing pci device, kernel will initialize and activate
+(max_cpu_count+1) msi irq. However, in vcpu hotplugging situation,
+because of vgic, max_cpu_count may be greater than online_cpu_count.
+Those offline cpus will also be activated by kernel, which cause failure
+of passthroughing pci device.
+
+To solve this problem, this patch add a function
+"check_affinity_mask_online" to check if msi_desc->affinity contains
+online cpus. If current cpu is offline, it will continue the for loop to
+skip activating related irq.
+
+Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
+Reviewed-by: Shuo Tan <shuo.tan@linux.alibaba.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+---
+ kernel/irq/msi.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index e7ad992548416..1ecfbad4d2c8c 100644
+--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
+@@ -1289,6 +1289,23 @@ static int populate_alloc_info(struct irq_domain *domain, struct device *dev,
+ 	return 0;
+ }
+ 
+/* This function is used for check whether the cpu affinity belongs to the
+ * online cpus. When we passthrough the nvme devices, the kernel will allocate
+ * maxcpus+1 MSI irqs and then activate them. In vcpu hotplug situations, it
+ * may happen that kernel activates the offline cpus when bootcpus < maxcpus.
+ * To avoid this conflict, this function check the affinities.
+ */
+static inline bool check_affinity_mask_online(struct irq_affinity_desc *affinity)
+{
+       int cpu;
+
+       for_each_cpu(cpu, &affinity->mask)
+               if (cpu_online(cpu))
+                       return true;
+
+       return false;
+}
+
+ static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
+ 				   struct msi_ctrl *ctrl)
+ {
+@@ -1340,6 +1357,9 @@ static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain
+ 			return msi_handle_pci_fail(domain, desc, allocated);
+ 
+ 		for (i = 0; i < desc->nvec_used; i++) {
+			if (desc->affinity
+				&& !check_affinity_mask_online(desc->affinity))
+				continue;
+ 			irq_set_msi_desc_off(virq, i, desc);
+ 			irq_debugfs_copy_devname(virq + i, dev);
+ 			ret = msi_init_virq(domain, virq + i, vflags);
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0007-smp-update-bringup_nonboot_cpus-parameters.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0007-smp-update-bringup_nonboot_cpus-parameters.patch
@@ -0,0 +1,139 @@
+From e82de56fe9659355b208a283d56d5924875e5290 Mon Sep 17 00:00:00 2001
+From: xuejun-xj <jiyunxue@linux.alibaba.com>
+Date: Tue, 23 May 2023 09:43:02 +0800
+Subject: [PATCH 7/8] smp: update bringup_nonboot_cpus parameters
+
+On aarch64, kvm doesn't allow vmm to call KVM_CREATE_VCPU ioctls after
+vm has already started, which is caused by vgic_initialized check in
+kvm_arch_vcpu_precreate() function. Therefore, to support vcpu hotplug
+feature on aarch64, all the vcpus should be created and configured ready
+for start at booting procedure.
+
+To solve the problem, dragonball will add a property in each cpu node,
+called "boot-onlined". This property indicates whether this cpu should
+be onlined at first boot. It has two values: 0 and 1. 0 means offline,
+while 1 means online.
+
+This commit also add a helper function called "of_get_cpu_boot_onlined",
+which parse the cpu node and get the value of boot-onlined property.
+Then update the global variable "boot_onlined_cpu".
+
+When kernel calling smp_init(), bringup_nonboot_cpus will start all the
+other cpus except cpu0. The activated cpu number equals setup_max_cpus.
+In vcpu hotplug scenario, vmm will create all the vcpufd before vm is
+initialized, while activating only a few vcpus at first boot. The
+setup_max_cpus variable will be initialized as all vcpu count. This
+cause that the other cpus cannot find enough cpu threads, and they will
+wait for 5 seconds each cpu.
+
+Therefore, we use boot_onlined_cpu instead of setup_max_cpus to give
+"bringup_nonboot_cpus" correct cpu number it needs.
+
+Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
+---
+ .../devicetree/bindings/arm/cpus.yaml         | 12 +++++++++
+ arch/arm64/kernel/smp.c                       | 25 +++++++++++++++++++
+ kernel/smp.c                                  | 11 +++++++-
+ 3 files changed, 47 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
+index 736b7ab1bd0a0..fb26561f8e82f 100644
+--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
+@@ -425,6 +425,18 @@ properties:
+       formed by encoding the target CPU id into the low bits of the
+       physical start address it should jump to.
+ 
+  boot-onlined:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: |
+      The boot-onlined property is an optional u32 value that indicates
+      whether the cpu device should be activated at first boot. This is
+      useful in vcpu hotplug scenario to pass correct value of activated
+      cpu number.
+
+      This property has two values: 0 and 1. 1 means the cpu should be
+      activated while 0 means it shouldn't.
+
+
+   thermal-idle:
+     type: object
+ 
+diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
+index 68cea3a4a35ca..7938569e400c6 100644
+--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
+@@ -673,6 +673,28 @@ static void __init acpi_parse_and_init_cpus(void)
+ #define acpi_parse_and_init_cpus(...)	do { } while (0)
+ #endif
+ 
+
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+extern unsigned int boot_onlined_cpu;
+static void __init of_get_cpu_boot_onlined(struct device_node *dn)
+{
+	unsigned int boot_onlined;
+	int r;
+
+	r = of_property_read_u32(dn, "boot-onlined", &boot_onlined);
+	if (r) {
+		pr_err("%pOF: missing boot-onlined property\n", dn);
+		return;
+	}
+	/*
+	 * Property boot-onlined has two values: 0 and 1.
+	 * 0 means offline, and 1 means online.
+	 * Here just count the number of boot_onlined_cpu.
+	 */
+	boot_onlined_cpu += boot_onlined;
+}
+#endif
+
+ /*
+  * Enumerate the possible CPU set from the device tree and build the
+  * cpu logical map array containing MPIDR values related to logical
+@@ -683,6 +705,9 @@ static void __init of_parse_and_init_cpus(void)
+ 	struct device_node *dn;
+ 
+ 	for_each_of_cpu_node(dn) {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+		of_get_cpu_boot_onlined(dn);
+#endif
+ 		u64 hwid = of_get_cpu_hwid(dn, 0);
+ 
+ 		if (hwid & ~MPIDR_HWID_BITMASK)
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 02f52291fae42..95dce332c18f1 100644
+--- a/kernel/smp.c
+++ b/kernel/smp.c
+@@ -988,17 +988,26 @@ void __init setup_nr_cpu_ids(void)
+ 	set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
+ }
+ 
+/* Setup number of CPUs to activate */
+unsigned int boot_onlined_cpu = 0;
+
+ /* Called by boot processor to activate the rest. */
+ void __init smp_init(void)
+ {
+ 	int num_nodes, num_cpus;
+ 
+	int num_onlined_cpu = setup_max_cpus;
+
+ 	idle_threads_init();
+ 	cpuhp_threads_init();
+ 
+ 	pr_info("Bringing up secondary CPUs ...\n");
+ 
+-	bringup_nonboot_cpus(setup_max_cpus);
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+	if (boot_onlined_cpu != 0)
+		num_onlined_cpu = boot_onlined_cpu;
+#endif
+	bringup_nonboot_cpus(num_onlined_cpu);
+ 
+ 	num_nodes = num_online_nodes();
+ 	num_cpus  = num_online_cpus();
+-- 
+2.34.1
+
--- a/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch
+++ b/tools/packaging/kernel/patches/6.18.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch
@@ -0,0 +1,173 @@
+From 3ffce0dd35013a3e1f15b2595b21875546a5f4c8 Mon Sep 17 00:00:00 2001
+From: Chao Wu <chaowu@linux.alibaba.com>
+Date: Wed, 27 Dec 2023 14:43:47 +0800
+Subject: [PATCH 8/8] upcall: add pci hotplug / hot-unplug support
+
+add two new upcall functions add_pci_dev and del_pci_dev, mainly for hotplugging
+and hot-unplugging pci device in the guest kernel through the upcall server.
+
+Users could implement upcall client side with add_pci or del_pci command and trigger
+those commands in the hypervisor side.
+
+As always, Dragonball hypervisor will implement the client side to do pci hotplug and
+hot-unplug as an example
+
+Signed-off-by: Gerry Liu <gerry@linux.alibaba.com>
+Signed-off-by: Helin Guo <helinguo@linux.alibaba.com>
+Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
+---
+ drivers/misc/dragonball/upcall_srv/Kconfig    | 11 +++
+ .../upcall_srv/dragonball_device_manager.c    | 90 +++++++++++++++++++
+ 2 files changed, 101 insertions(+)
+
+diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
+index fc83f03c2edd2..735928316eda2 100644
+--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
+@@ -47,3 +47,14 @@ config DRAGONBALL_HOTPLUG_CPU
+ 	  structure with command and parameter to hot-pluging an vCPU.
+ 
+ 	  If unsure, say N.
+	  
+config DRAGONBALL_HOTPLUG_PCI
+	bool "PCI hotplug/hotunplug support"
+	depends on DRAGONBALL_DEVICE_MANAGER
+	default y
+	help
+	  This configure implements a PCI hotplug/hotunplug support, vmm
+	  should send hotplug request by vsock which follow special data
+	  structure with command and parameter to hot-pluging a PCI device.
+
+	  If unsure, say N.
+diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+index e2d1b7d56eafe..6f81b8cca19df 100644
+--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+@@ -22,6 +22,7 @@
+ #include <linux/cpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/cpuhotplug.h>
+#include <linux/pci.h>
+ #include <asm/cpu.h>
+ #ifdef CONFIG_X86_64
+ #include <asm/mpspec.h>
+@@ -94,6 +95,12 @@ struct devmgr_req {
+ 			uint8_t apic_ids[256];
+ #endif
+ 		} cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+		struct {
+			uint8_t busno;
+			uint8_t devfn;
+		} pci_dev_info;
+ #endif
+ 	} msg_load;
+ };
+@@ -121,6 +128,9 @@ struct devmgr_reply {
+ #endif
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ 	struct cpu_dev_reply_info cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+	struct {} pci_dev_info;
+ #endif
+ 	} msg_load;
+ };
+@@ -302,6 +312,82 @@ static int del_mmio_dev(struct devmgr_req *req,
+ }
+ #endif
+ 
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+static int add_pci_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret = 0;
+	struct devmgr_msg_header *rep_mh = &rep->msg_header;
+	uint8_t busno = req->msg_load.pci_dev_info.busno;
+	uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+
+	pr_info("add pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+	pci_lock_rescan_remove();
+
+	/* It is similar to pci_rescan_bus */
+
+	bus = pci_find_bus(0, busno);
+	if (!bus) {
+		pr_err("Could not find PCI bus for busno %02x\n", busno);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pci_scan_slot(bus, devfn);
+	dev = pci_get_slot(bus, devfn);
+	if (!dev) {
+		pr_err("Could not find PCI device for slot %02x\n", devfn);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pci_bus_claim_resources(bus);
+
+	pci_bus_add_devices(bus);
+
+	pci_dev_put(dev);
+
+out:
+	pci_unlock_rescan_remove();
+	if (!ret)
+		_fill_msg_header(rep_mh, 0, ADD_PCI, 0);
+	return ret;
+}
+
+static int del_pci_dev(struct devmgr_req *req,
+			struct devmgr_reply *rep)
+{
+	int ret = 0;
+	struct devmgr_msg_header *rep_mh = &rep->msg_header;
+	uint8_t busno = req->msg_load.pci_dev_info.busno;
+	uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+	struct pci_dev *dev;
+
+	pr_info("remove pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+	pci_lock_rescan_remove();
+
+	dev = pci_get_domain_bus_and_slot(0, busno, devfn);
+
+	if (!dev) {
+		pr_err("Could not find PCI device for slot %02x\n", devfn);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pci_stop_and_remove_bus_device(dev);
+
+	pci_dev_put(dev);
+out:
+	pci_unlock_rescan_remove();
+	if (!ret)
+		_fill_msg_header(rep_mh, 0, DEL_PCI, 0);
+	return ret;
+}
+#endif
+ 
+ #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ #if defined(CONFIG_X86_64)
+@@ -539,6 +625,10 @@ static struct {
+ 	{ADD_CPU, add_cpu_dev},
+ 	{DEL_CPU, del_cpu_dev},
+ #endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+	{ADD_PCI, add_pci_dev},
+	{DEL_PCI, del_pci_dev},
+#endif
+ };
+ 
+ static action_route_t get_action(struct devmgr_req *req)
+-- 
+2.34.1
+
--- a/tools/testing/gatekeeper/jobs.py
+++ b/tools/testing/gatekeeper/jobs.py
@@ -38,6 +38,11 @@ if os.environ.get("GITHUB_TOKEN"):
    _GH_HEADERS["Authorization"] = f"Bearer {os.environ['GITHUB_TOKEN']}"
 _GH_API_URL = f"https://api.github.com/repos/{os.environ['GITHUB_REPOSITORY']}"
 _GH_RUNS_URL = f"{_GH_API_URL}/actions/runs"
+_GH_SUMMARY_URL = (
+    f"{os.environ.get('GITHUB_SERVER_URL')}/"
+    f"{os.environ.get('GITHUB_REPOSITORY')}/actions/runs/"
+    f"{os.environ.get('GITHUB_RUN_ID')}"
+)
 if os.environ.get("DEBUG", "false") == "true":
    DEBUG_DIR = os.path.join(os.path.abspath('.'), str(int(time.time())))
    os.makedirs(DEBUG_DIR)
@@ -135,12 +140,13 @@ class Checker:
        warn = []
        for name, job in self.results.items():
            status = self._job_status(job)
+            url = job.get("html_url", "")
            if status == RUNNING:
-                warn.append(f"WARN: {name} - Still running")
+                warn.append(f"WARN: {name} - Still running {url}")
            elif status == PASS:
-                good.append(f"PASS: {name} - success")
+                good.append(f"PASS: {name} - success {url}")
            else:
-                bad.append(f"FAIL: {name} - Not passed - {status}")
+                bad.append(f"FAIL: {name} - Not passed - {status} {url}")
        out = '\n'.join(sorted(good) + sorted(warn) + sorted(bad))
        stat = self.status()
        if stat == RUNNING:
@@ -154,6 +160,51 @@ class Checker:
            status = "Not all required jobs passed!"
        return f"{out}\n\n{status}"

+    def write_step_summary(self):
+        """Write WARN/FAIL results to GitHub Step Summary if available"""
+        def _section(name, items, icon='*'):
+            """Format a MD section"""
+            lines = []
+            lines.append(f"<details open>\n<summary><h2>{name}</h2></summary>\n")
+            if not items:
+                lines.append("None")
+            else:
+                for item in items:
+                    lines.append(f"{icon} {item}")
+            lines.append("</details>\n")
+            return lines
+
+        summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+        if not summary_path:
+            return
+
+        lines = []
+        passing = []
+        failing = []
+        running = []
+
+        for name, job in self.results.items():
+            status = self._job_status(job)
+            url = job.get("html_url", "")
+            if status == RUNNING:
+                running.append(f"[{name}]({url})" if url else name)
+            elif status == PASS:
+                passing.append(f"[{name}]({url})" if url else name)
+            else:
+                link = f"[{name}]({url})" if url else name
+                failing.append(f"{link} ({status})")
+        lines.extend(_section("Failing checks", failing, "❌"))
+        lines.extend(_section("In progress checks", running, "🔶"))
+        lines.extend(_section("Successful checks", passing, "🟢"))
+        summary = [f"Total: {len(self.results)}, "
+                   f"Passed: {len(passing)}, "
+                   f"Failed: {len(failing)}, Running: {len(running)}"]
+        lines.extend(_section("Summary", summary))
+
+        with open(summary_path, "w", encoding="utf8") as summary:
+            summary.write("\n".join(lines) + "\n")
+        print(f"Human-readable summary: {_GH_SUMMARY_URL}")
+
    def fetch_json_from_url(self, url, task, params=None):
        """Fetches URL and reports json output"""
        print(url, file=sys.stderr)
@@ -220,6 +271,7 @@ class Checker:
            for job in jobs:
                self.record(run["name"], job)
        print(self)
+        self.write_step_summary()
        return self.status()

    def wait_for_required_tests(self):
--- a/tools/testing/gatekeeper/required-tests.yaml
+++ b/tools/testing/gatekeeper/required-tests.yaml
@@ -102,7 +102,7 @@ mapping:
      - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, rke2)
      - Kata Containers CI / kata-containers-ci-on-push / run-kata-monitor-tests / run-monitor (qemu, crio)
      - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-nvidia-gpu / run-nvidia-gpu-tests-on-amd64
-      - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-nvidia-gpu / run-nvidia-gpu-snp-tests-on-amd64
+      # - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-nvidia-gpu / run-nvidia-gpu-snp-tests-on-amd64
    required-labels:
      - ok-to-test
  build:
--- a/utils.mk
+++ b/utils.mk
@@ -181,16 +181,9 @@ CWD := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 standard_rust_check:
 	@echo "standard rust check..."
 	cargo fmt -- --check
-	cargo clippy --all-targets --all-features --release \
+	cargo clippy --all-targets --all-features --release --locked \
 		-- \
 		-D warnings
-	cargo check
-	@DIFF=$$(git diff HEAD); \
-	if [ -n "$$DIFF" ]; then \
-		echo "ERROR: cargo check resulted in uncommited changes"; \
-		echo "$$DIFF"; \
-		exit 1; \
-	fi

 # Install a file (full version).
 #
--- a/versions.yaml
+++ b/versions.yaml
@@ -226,7 +226,7 @@ assets:
  kernel-dragonball-experimental:
    description: "Linux kernel with Dragonball VMM optimizations like upcall"
    url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/"
-    version: "v6.12.47"
+    version: "v6.18.15"

 externals:
  description: "Third-party projects used by the system"
@@ -388,7 +388,7 @@ externals:
  nydus-snapshotter:
    description: "Snapshotter for Nydus image acceleration service"
    url: "https://github.com/containerd/nydus-snapshotter"
-    version: "v0.15.10"
+    version: "v0.15.13"

  opa:
    description: "Open Policy Agent"
Author	SHA1	Message	Date
Steve Horsman	8c2b7ed619	Merge pull request #12729 from fidencio/topic/kata-deploy-nydus-dont-touch-data-dir-on-install kata-deploy: nydus: never remove the data dir	2026-03-25 10:28:50 +00:00
Steve Horsman	af7fdd5cd1	Merge pull request #12725 from kata-containers/sprt/cargo-check-fix build: Don't fail `cargo check` on a dirty tree	2026-03-25 10:21:16 +00:00
Steve Horsman	0d8186ae16	Merge pull request #12730 from fidencio/topic/bump-nydus-snapshotter versions: Bump nydus-snapshotter to v0.15.13	2026-03-25 10:20:23 +00:00
Steve Horsman	7e0f5e533a	Merge pull request #12733 from fidencio/topic/unrequire-nvidia-gpu-snp-tests-till-we-fix-auth-issues gatekeeper: Unrequire NVIDIA GPU SNP tests till auth is fixed	2026-03-25 10:11:10 +00:00
Fabiano Fidêncio	bcfb2354e0	gatekeeper: Unrequire NVIDIA GPU SNP tests till auth is fixed SSIA, the NIM tests are breaking due to authentication issues, and those issues are blocking other PRs. Let's unrequire the test for now, and mark it as required again once we fixed the auth issues. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>	2026-03-25 10:23:53 +01:00
Fabiano Fidêncio	caf6b244e6	versions: Bump nydus-snapshotter to v0.15.13 As this brings in a fix for using images with too many layers. https://github.com/containerd/nydus-snapshotter/releases/tag/v0.15.13 Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>	2026-03-25 08:31:48 +01:00
Fabiano Fidêncio	fb5482f647	kata-deploy: nydus: never remove the data directory Removing /var/lib/nydus-snapshotter during install or uninstall creates a split-brain state: the nydus backend starts empty while containerd's BoltDB (meta.db) still holds snapshot records from the previous run. Any subsequent image pull then fails with: "unable to prepare extraction snapshot: target snapshot \"sha256:...\": already exists" An earlier attempt cleaned up containerd's BoltDB via `ctr snapshots rm` before wiping the directory, but that cleanup is inherently fragile: - It requires the nydus gRPC service to be reachable at cleanup time. If the service is stopped, crashed, or not yet running, every `ctr` call silently fails and the stale records remain. - Any workload still actively using a snapshot blocks the entire cleanup, making it impossible to guarantee a clean state. The correct invariant is that meta.db and the nydus backend always agree. Preserving the data directory unconditionally guarantees this: - Fresh install: data directory does not exist, nydus starts empty. - Reinstall: existing snapshots and nydus.db are preserved, meta.db and backend remain in sync, new binary starts cleanly. - After uninstall: containerd is reconfigured without the nydus proxy_plugins entry and restarted, so the snapshot records in meta.db are completely dormant — nothing will use them. If nydus is reinstalled later, the data directory is still present and both sides remain in sync, so no split-brain can occur. Any stale snapshots from previous workloads are garbage-collected by containerd once the images referencing them are removed. This also removes the cleanup_containerd_nydus_snapshots, cleanup_nydus_snapshots, and cleanup_nydus_containers helpers that were introduced by the earlier (fragile) attempt. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Made-with: Cursor	2026-03-25 07:06:41 +01:00
Alex Lyn	46aa318b74	Merge pull request #12716 from lifupan/bump_dragonball_kernel kernel: Bump the kernel to v6.18.15 for dragonball	2026-03-25 11:04:44 +08:00
Aurélien Bombo	ec9c57c595	Merge pull request #12467 from ldoktor/gk-output tools.gatekeeper: Improve output	2026-03-24 17:03:55 -05:00
Fabiano Fidêncio	8950f1caeb	Merge pull request #12706 from fidencio/topic/ci-tdx-nydus-snapshotter tests: Use the helm chart to setup nydus for TDX	2026-03-24 22:37:38 +01:00
Fabiano Fidêncio	814ae53d77	tests: Use the helm chart to setup nydus for TDX Now that containerd 2.3.0-beta.0 has been released, it brings fixes for multi-snapshotters that allows us to test the baremetal machines in the same way we test the non-baremetal ones. Let's start doing the switch for TDX as timezone is friendlier with Mikko. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>	2026-03-24 19:13:59 +01:00
Fabiano Fidêncio	27dfb0d06f	Merge pull request #12724 from fidencio/topic/kata-deploy-properly-cleanup-nydus-snapshotter-on-uninstall kata-deploy: nydus: clean containerd metadata before cleaning up the backend	2026-03-24 19:13:25 +01:00
Aurélien Bombo	7ae2282a99	build: Don't fail `cargo check` on a dirty tree `cargo check` was introduced in `3f1533a` to check that Cargo.lock is in sync with Cargo.toml. However, if there are uncommitted changes in the working tree, the current invocation will immediately fail because of the `git diff` call, which is frustrating for local development. As it turns out, `cargo clippy` is a superset of `cargo check`, so we can simply pass `--locked` to `cargo clippy` to detect Cargo.lock issues. This is tested with the following change: diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 96b6c676d..e1963af00 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -4305,6 +4305,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" name = "test-utils" version = "0.1.0" dependencies = [ - "libc", "nix 0.26.4", ] which results in the following output: $ make -C src/agent check make: Entering directory '/kata-containers/src/agent' standard rust check... cargo fmt -- --check cargo clippy --all-targets --all-features --release --locked \ -- \ -D warnings error: the lock file /kata-containers/src/agent/Cargo.lock needs to be updated but --locked was passed to prevent this If you want to try to generate the lock file without accessing the network, remove the --locked flag and use --offline instead. make: *** [../../utils.mk:184: standard_rust_check] Error 101 make: Leaving directory '/kata-containers/src/agent' Signed-off-by: Aurélien Bombo <abombo@microsoft.com>	2026-03-24 11:22:14 -05:00
Fabiano Fidêncio	fd583d833b	kata-deploy: nydus: clean containerd metadata before wiping backend When /var/lib/nydus-snapshotter is removed, containerd's BoltDB (meta.db at /var/lib/containerd/) still holds snapshot records for the nydus snapshotter. On the next install these stale records cause image pulls to fail with: "unable to prepare extraction snapshot: target snapshot \"sha256:...\": already exists" The failure path in core/unpack/unpacker.go: 1. sn.Prepare() → metadata layer finds the target chainID in BoltDB → returns AlreadyExists without touching the nydus backend. 2. sn.Stat() → metadata layer finds the BoltDB record, then calls s.Snapshotter.Stat(bkey) on the nydus gRPC backend → NotFound (backend was wiped). 3. The unpacker treats NotFound as a transient key-collision race and retries 3 times; all 3 attempts hit the same dead end, and the pull is aborted. The commit message of `62ad0814c` ("nydus: Always start from a clean state") assumed "containerd will re-pull/re-unpack when it finds non- existent snapshots", but that is not what happens: the metadata layer intercepts the Prepare call in BoltDB before the backend is ever consulted. Fix: call cleanup_containerd_nydus_snapshots() before stopping the nydus service (and thus before wiping its data directory) in both install_nydus_snapshotter and uninstall_nydus_snapshotter. The cleanup must run while the service is still up because ctr snapshots rm goes through the metadata layer which calls the nydus gRPC backend to physically remove the snapshot; if the service is already stopped the backend call fails and the BoltDB record remains. The cleanup: - Discovers all containerd namespaces via `ctr namespaces ls -q` (falls back to k8s.io if that fails). - Removes containers whose Snapshotter field matches the nydus plugin name; these become dangling references once snapshots are gone and can confuse container reconciliation after an aborted CI run. - Removes snapshots round by round (leaf-first) until either the list is empty or no progress can be made (see below). Note: containerd's GC cannot substitute for this explicit cleanup. The image record (a GC root) references content blobs which reference the snapshots via gc.ref labels, keeping the entire chain alive in the GC graph even after the nydus backend is wiped. Snapshot removal rounds ----------------------- Snapshot chains are linear: an image with N layers produces a chain of N snapshots, each parented on the previous. Only the current leaf can be removed each round, so N layers require exactly N rounds. There is no fixed round cap — the loop terminates when either the list reaches zero (success) or a round removes nothing at all (all remaining snapshots are actively in use by running workloads). Active workload safety ---------------------- If active workloads still hold nydus snapshots (e.g. during a live upgrade), no progress is made in a round and cleanup_nydus_snapshots returns false. Both install_nydus_snapshotter and uninstall_nydus_snapshotter gate the fs::remove_dir_all on that return value: - true → proceed as before: stop service, wipe data dir. - false → stop service, skip data dir removal, log a warning. The new nydus instance starts on the existing backend state; running containers are left intact. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Made-with: Cursor	2026-03-24 16:44:25 +01:00
Fabiano Fidêncio	eb4ce0e98b	Merge pull request #12676 from manuelh-dev/mahuber/gpu-ci-data-storage tests: gpu: use container data storage feature	2026-03-24 09:59:13 +01:00
Fupan Li	6a832dd1f3	kernel: Bump the kernel to v6.18.15 for dragonball Bump the dragonball supported kernel to v6.18.15. Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>	2026-03-24 06:46:43 +08:00
Manuel Huber	79efe3e041	tests: gpu: use container data storage feature Use the container data storage feature for the k8s-nvidia-nim.bats test pod manifests. This reduces the pods' memory requirements. For this, enable the block-encrypted emptydir_mode for the NVIDIA GPU TEE handlers. Signed-off-by: Manuel Huber <manuelh@nvidia.com>	2026-03-23 11:43:11 -07:00
Steve Horsman	2728b493d5	Merge pull request #12681 from manuelh-dev/mahuber/ci-pip-py-venv tests: cc: setup function for python venv	2026-03-23 14:33:30 +00:00
Manuel Huber	5765bc97b4	tests: cc: setup function for python venv We recently had a failure on a new CI runner where ${HOME}/.cicd/venv/bin/activate was not present. The relevant call originated from ensure_sev_snp_measure. Thus, add a function ensure_cicd_python_venv before callers to pip install. Currently, the NVIDIA NIM test and the confidential attestation tests use pip to install dependencies. Signed-off-by: Manuel Huber <manuelh@nvidia.com>	2026-03-18 17:07:47 -07:00
Lukáš Doktor	ce65d17276	tools.gatekeeper: Add support for GITHUB_STEP_SUMMARY this should produce a table of failed/running jobs as a table along with links to them. On pass it should only produce simple line with how many jobs passed. Signed-off-by: Lukáš Doktor <ldoktor@redhat.com>	2026-03-06 12:19:26 -03:00
Lukáš Doktor	27bebfb438	tools.gatekeeper: Print link to the results in status output to simplify analyzing failures let's print the link to the job result next to the status. Signed-off-by: Lukáš Doktor <ldoktor@redhat.com>	2026-03-06 12:19:26 -03:00