From d4dd87a9743ca253393739f20ff6a66fec291569 Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Fri, 24 Jan 2025 02:55:45 +0000 Subject: [PATCH 1/3] gpu: Update config files With the recent changed to cgroupsv1 and AGENT_INIT=no we need update to the config files. Signed-off-by: Zvonko Kaiser --- src/runtime/Makefile | 19 ++++++++++++++----- .../configuration-qemu-nvidia-gpu-snp.toml.in | 4 ++-- .../configuration-qemu-nvidia-gpu-tdx.toml.in | 2 +- .../configuration-qemu-nvidia-gpu.toml.in | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 71f9320665..815ee31a3b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -439,17 +439,24 @@ ifneq (,$(QEMUCMD)) KERNELNAME_CONFIDENTIAL_NV = $(call MAKE_KERNEL_CONFIDENTIAL_NAME_NV,$(KERNELCONFIDENTIALTYPE)) KERNELPATH_CONFIDENTIAL_NV = $(KERNELDIR)/$(KERNELNAME_CONFIDENTIAL_NV) - DEFAULTVCPUS_NV = 16 - DEFAULTMEMORY_NV = 65536 + DEFAULTVCPUS_NV = 1 + DEFAULTMEMORY_NV = 2048 DEFAULTTIMEOUT_NV = 320 DEFAULTVFIOPORT_NV = root-port DEFAULTPCIEROOTPORT_NV = 8 KERNELPARAMS_NV = "agent.hotplug_timeout=20" - KERNELPARAMS_NV += $(KERNELPARAMS) + KERNELPARAMS_NV += "cgroup_no_v1=all" - KERNELTDXPARAMS_NV = "authorize_allow_devs=pci:ALL" - KERNELTDXPARAMS_NV += $(KERNELTDXPARAMS) + KERNELTDXPARAMS_NV = $(KERNELPARAMS_NV) + KERNELTDXPARAMS_NV += "clearcpuid=mtrr" + KERNELTDXPARAMS_NV += "authorize_allow_devs=pci:ALL" + + KERNELSNPPARAMS_NV = $(KERNELPARAMS_NV) + + # Setting this to false can lead to cgroup leakages in the host + # Best practice for production is to set this to true + DEFSANDBOXCGROUPONLY_NV = true endif ifneq (,$(CLHCMD)) @@ -617,7 +624,9 @@ USER_VARS += DEFAULTVFIOPORT_NV USER_VARS += DEFAULTPCIEROOTPORT_NV USER_VARS += KERNELPARAMS_NV USER_VARS += KERNELTDXPARAMS_NV +USER_VARS += KERNELSNPPARAMS_NV USER_VARS += DEFAULTTIMEOUT_NV +USER_VARS += DEFSANDBOXCGROUPONLY_NV USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index 07f2ce0728..3fc4810636 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -70,7 +70,7 @@ valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@ # may stop the virtual machine from booting. # To see the list of default parameters, enable hypervisor debug, create a # container and look for 'default-kernel-parameters' log entries. -kernel_params = "@KERNELPARAMS@" +kernel_params = "@KERNELSNPPARAMS_NV@" # Path to the firmware. # If you want that qemu uses the default firmware leave this option empty @@ -617,7 +617,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. # The sandbox cgroup is constrained if there is no container type annotation. # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType -sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@ # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 5b6066b635..8f1586837f 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -613,7 +613,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. # The sandbox cgroup is constrained if there is no container type annotation. # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType -sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@ # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index b141667e5a..c3eaf4a878 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -638,7 +638,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. # The sandbox cgroup is constrained if there is no container type annotation. # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType -sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@ # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful From 66ccc25724cfe1dceb1273f347c67ad56496b9ee Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Thu, 6 Feb 2025 20:58:10 +0000 Subject: [PATCH 2/3] tdx: Update GPU config for the latest TDX stack We need extra kernel_params for TDX Signed-off-by: Zvonko Kaiser --- src/runtime/Makefile | 3 +++ src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in | 2 +- src/runtime/config/configuration-qemu-tdx.toml.in | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 815ee31a3b..2f1721c7b8 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -457,6 +457,8 @@ ifneq (,$(QEMUCMD)) # Setting this to false can lead to cgroup leakages in the host # Best practice for production is to set this to true DEFSANDBOXCGROUPONLY_NV = true + # The latest OVMF build should be good for both TDX and SNP + FIRMWAREPATH_NV := $(PREFIXDEPS)/share/ovmf/OVMF.fd endif ifneq (,$(CLHCMD)) @@ -627,6 +629,7 @@ USER_VARS += KERNELTDXPARAMS_NV USER_VARS += KERNELSNPPARAMS_NV USER_VARS += DEFAULTTIMEOUT_NV USER_VARS += DEFSANDBOXCGROUPONLY_NV +USER_VARS += FIRMWAREPATH_NV USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 8f1586837f..8207959063 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -70,7 +70,7 @@ kernel_params = "@KERNELTDXPARAMS_NV@" # Path to the firmware. # If you want that qemu uses the default firmware leave this option empty -firmware = "@FIRMWARETDVFPATH@" +firmware = "@FIRMWAREPATH_NV@" # Path to the firmware volume. # firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in index de0dffca49..37ca97e1a2 100644 --- a/src/runtime/config/configuration-qemu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -71,7 +71,7 @@ kernel_params = "@KERNELTDXPARAMS@" # Path to the firmware. # If you want that qemu uses the default firmware leave this option empty -firmware = "@FIRMWARETDVFPATH@" +firmware = "@FIRMWAREPATH_NV@" # Path to the firmware volume. # firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables From 4bda16565bb970a8eab448258bece7fe261f8a09 Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Thu, 13 Feb 2025 16:51:20 +0000 Subject: [PATCH 3/3] gpu: Update timeouts With the create_container_timeout the dial_timeout is lest important. Add the custom timeout for GPUs in create_container_timeout Signed-off-by: Zvonko Kaiser --- src/runtime/Makefile | 2 +- .../config/configuration-qemu-nvidia-gpu-snp.toml.in | 4 ++-- .../config/configuration-qemu-nvidia-gpu-tdx.toml.in | 6 +++--- src/runtime/config/configuration-qemu-nvidia-gpu.toml.in | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 2f1721c7b8..192bab185c 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -441,7 +441,7 @@ ifneq (,$(QEMUCMD)) DEFAULTVCPUS_NV = 1 DEFAULTMEMORY_NV = 2048 - DEFAULTTIMEOUT_NV = 320 + DEFAULTTIMEOUT_NV = 500 DEFAULTVFIOPORT_NV = root-port DEFAULTPCIEROOTPORT_NV = 8 diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index 3fc4810636..223f7f9186 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -542,7 +542,7 @@ kernel_modules=[] # Agent connection dialing timeout value in seconds # (default: 90) -dial_timeout = @DEFAULTTIMEOUT_NV@ +dial_timeout = 90 [runtime] # If enabled, the runtime will log additional debug messages to the @@ -676,7 +676,7 @@ experimental=@DEFAULTEXPFEATURES@ # Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. # In essence, the timeout used for guest pull=runtime-request-timeout