From d4dd87a9743ca253393739f20ff6a66fec291569 Mon Sep 17 00:00:00 2001
From: Zvonko Kaiser <zkaiser@nvidia.com>
Date: Fri, 24 Jan 2025 02:55:45 +0000
Subject: [PATCH 1/3] gpu: Update config files

With the recent changed to cgroupsv1 and AGENT_INIT=no we
need update to the config files.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
---
 src/runtime/Makefile                          | 19 ++++++++++++++-----
 .../configuration-qemu-nvidia-gpu-snp.toml.in |  4 ++--
 .../configuration-qemu-nvidia-gpu-tdx.toml.in |  2 +-
 .../configuration-qemu-nvidia-gpu.toml.in     |  2 +-
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/runtime/Makefile b/src/runtime/Makefile
index 71f9320665..815ee31a3b 100644
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -439,17 +439,24 @@ ifneq (,$(QEMUCMD))
     KERNELNAME_CONFIDENTIAL_NV = $(call MAKE_KERNEL_CONFIDENTIAL_NAME_NV,$(KERNELCONFIDENTIALTYPE))
     KERNELPATH_CONFIDENTIAL_NV = $(KERNELDIR)/$(KERNELNAME_CONFIDENTIAL_NV)
 
-    DEFAULTVCPUS_NV = 16
-    DEFAULTMEMORY_NV = 65536
+    DEFAULTVCPUS_NV = 1
+    DEFAULTMEMORY_NV = 2048
     DEFAULTTIMEOUT_NV = 320
     DEFAULTVFIOPORT_NV = root-port
     DEFAULTPCIEROOTPORT_NV = 8
 
     KERNELPARAMS_NV =  "agent.hotplug_timeout=20"
-    KERNELPARAMS_NV += $(KERNELPARAMS)
+    KERNELPARAMS_NV += "cgroup_no_v1=all"
 
-    KERNELTDXPARAMS_NV = "authorize_allow_devs=pci:ALL"
-    KERNELTDXPARAMS_NV += $(KERNELTDXPARAMS)
+    KERNELTDXPARAMS_NV = $(KERNELPARAMS_NV)
+    KERNELTDXPARAMS_NV += "clearcpuid=mtrr"
+    KERNELTDXPARAMS_NV += "authorize_allow_devs=pci:ALL"
+
+    KERNELSNPPARAMS_NV = $(KERNELPARAMS_NV)
+
+    # Setting this to false can lead to cgroup leakages in the host
+    # Best practice for production is to set this to true
+    DEFSANDBOXCGROUPONLY_NV = true
 endif
 
 ifneq (,$(CLHCMD))
@@ -617,7 +624,9 @@ USER_VARS += DEFAULTVFIOPORT_NV
 USER_VARS += DEFAULTPCIEROOTPORT_NV
 USER_VARS += KERNELPARAMS_NV
 USER_VARS += KERNELTDXPARAMS_NV
+USER_VARS += KERNELSNPPARAMS_NV
 USER_VARS += DEFAULTTIMEOUT_NV
+USER_VARS += DEFSANDBOXCGROUPONLY_NV
 USER_VARS += DEFROOTFSTYPE
 USER_VARS += MACHINETYPE
 USER_VARS += KERNELDIR
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
index 07f2ce0728..3fc4810636 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
@@ -70,7 +70,7 @@ valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@
 # may stop the virtual machine from booting.
 # To see the list of default parameters, enable hypervisor debug, create a
 # container and look for 'default-kernel-parameters' log entries.
-kernel_params = "@KERNELPARAMS@"
+kernel_params = "@KERNELSNPPARAMS_NV@"
 
 # Path to the firmware.
 # If you want that qemu uses the default firmware leave this option empty
@@ -617,7 +617,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 
 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
index 5b6066b635..8f1586837f 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -613,7 +613,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 
 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
index b141667e5a..c3eaf4a878 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
@@ -638,7 +638,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 
 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful

From 66ccc25724cfe1dceb1273f347c67ad56496b9ee Mon Sep 17 00:00:00 2001
From: Zvonko Kaiser <zkaiser@nvidia.com>
Date: Thu, 6 Feb 2025 20:58:10 +0000
Subject: [PATCH 2/3] tdx: Update GPU config for the latest TDX stack

We need extra kernel_params for TDX

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
---
 src/runtime/Makefile                                         | 3 +++
 src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in | 2 +-
 src/runtime/config/configuration-qemu-tdx.toml.in            | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/runtime/Makefile b/src/runtime/Makefile
index 815ee31a3b..2f1721c7b8 100644
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -457,6 +457,8 @@ ifneq (,$(QEMUCMD))
     # Setting this to false can lead to cgroup leakages in the host
     # Best practice for production is to set this to true
     DEFSANDBOXCGROUPONLY_NV = true
+    # The latest OVMF build should be good for both TDX and SNP
+    FIRMWAREPATH_NV := $(PREFIXDEPS)/share/ovmf/OVMF.fd
 endif
 
 ifneq (,$(CLHCMD))
@@ -627,6 +629,7 @@ USER_VARS += KERNELTDXPARAMS_NV
 USER_VARS += KERNELSNPPARAMS_NV
 USER_VARS += DEFAULTTIMEOUT_NV
 USER_VARS += DEFSANDBOXCGROUPONLY_NV
+USER_VARS += FIRMWAREPATH_NV
 USER_VARS += DEFROOTFSTYPE
 USER_VARS += MACHINETYPE
 USER_VARS += KERNELDIR
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
index 8f1586837f..8207959063 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -70,7 +70,7 @@ kernel_params = "@KERNELTDXPARAMS_NV@"
 
 # Path to the firmware.
 # If you want that qemu uses the default firmware leave this option empty
-firmware = "@FIRMWARETDVFPATH@"
+firmware = "@FIRMWAREPATH_NV@"
 
 # Path to the firmware volume.
 # firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in
index de0dffca49..37ca97e1a2 100644
--- a/src/runtime/config/configuration-qemu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-tdx.toml.in
@@ -71,7 +71,7 @@ kernel_params = "@KERNELTDXPARAMS@"
 
 # Path to the firmware.
 # If you want that qemu uses the default firmware leave this option empty
-firmware = "@FIRMWARETDVFPATH@"
+firmware = "@FIRMWAREPATH_NV@"
 
 # Path to the firmware volume.
 # firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables

From 4bda16565bb970a8eab448258bece7fe261f8a09 Mon Sep 17 00:00:00 2001
From: Zvonko Kaiser <zkaiser@nvidia.com>
Date: Thu, 13 Feb 2025 16:51:20 +0000
Subject: [PATCH 3/3] gpu: Update timeouts

With the create_container_timeout the dial_timeout is lest important.
Add the custom timeout for GPUs in create_container_timeout

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
---
 src/runtime/Makefile                                        | 2 +-
 .../config/configuration-qemu-nvidia-gpu-snp.toml.in        | 4 ++--
 .../config/configuration-qemu-nvidia-gpu-tdx.toml.in        | 6 +++---
 src/runtime/config/configuration-qemu-nvidia-gpu.toml.in    | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/runtime/Makefile b/src/runtime/Makefile
index 2f1721c7b8..192bab185c 100644
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -441,7 +441,7 @@ ifneq (,$(QEMUCMD))
 
     DEFAULTVCPUS_NV = 1
     DEFAULTMEMORY_NV = 2048
-    DEFAULTTIMEOUT_NV = 320
+    DEFAULTTIMEOUT_NV = 500
     DEFAULTVFIOPORT_NV = root-port
     DEFAULTPCIEROOTPORT_NV = 8
 
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
index 3fc4810636..223f7f9186 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
@@ -542,7 +542,7 @@ kernel_modules=[]
 
 # Agent connection dialing timeout value in seconds
 # (default: 90)
-dial_timeout = @DEFAULTTIMEOUT_NV@
+dial_timeout = 90
 
 [runtime]
 # If enabled, the runtime will log additional debug messages to the
@@ -676,7 +676,7 @@ experimental=@DEFAULTEXPFEATURES@
 # Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
-create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFAULTTIMEOUT_NV@
 
 # Base directory of directly attachable network config.
 # Network devices for VM-based containers are allowed to be placed in the
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
index 8207959063..e61a1a1d15 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -537,8 +537,8 @@ kernel_modules=[]
 #debug_console_enabled = true
 
 # Agent connection dialing timeout value in seconds
-# (default: 60)
-dial_timeout = @DEFAULTTIMEOUT_NV@
+# (default: 90)
+dial_timeout = 90
 
 [runtime]
 # If enabled, the runtime will log additional debug messages to the
@@ -672,7 +672,7 @@ experimental=@DEFAULTEXPFEATURES@
 # Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
-create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFAULTTIMEOUT_NV@
 
 # Base directory of directly attachable network config.
 # Network devices for VM-based containers are allowed to be placed in the
diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
index c3eaf4a878..905e11c643 100644
--- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
@@ -563,7 +563,7 @@ kernel_modules=[]
 
 # Agent connection dialing timeout value in seconds
 # (default: 90)
-dial_timeout = @DEFAULTTIMEOUT_NV@
+dial_timeout = 90
 
 [runtime]
 # If enabled, the runtime will log additional debug messages to the
@@ -697,7 +697,7 @@ experimental=@DEFAULTEXPFEATURES@
 # Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
-create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFAULTTIMEOUT_NV@
 
 # Base directory of directly attachable network config.
 # Network devices for VM-based containers are allowed to be placed in the