diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 91d3eb976c..88ef8077ff 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -511,6 +511,8 @@ ifneq (,$(QEMUCMD)) DEFENABLEVCPUPINNING_NV = true + DEFENABLENUMA_NV = true + # NVIDIA profile: rootfs filesystem type (erofs for read-only, compressed images) DEFROOTFSTYPE_NV := $(ROOTFSTYPE_EROFS) @@ -689,6 +691,7 @@ USER_VARS += DEFAULTTIMEOUT_NV USER_VARS += DEFAULTLAUNCHPROCESSTIMEOUT_NV USER_VARS += DEFSANDBOXCGROUPONLY_NV USER_VARS += DEFENABLEVCPUPINNING_NV +USER_VARS += DEFENABLENUMA_NV USER_VARS += DEFROOTFSTYPE_NV USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index 4dae978b9b..b15186867d 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -360,7 +360,12 @@ enable_iommu_platform = false # as is: map VM NUMA nodes to host 1:1 and bind vCPUs to related CPUs. # Note: To take proper advantage of NUMA, static_sandbox_resource_mgmt should # also be enabled for memory pre-allocation. -enable_numa = false +# +# GPU workloads strongly benefit from NUMA awareness: when enabled, the runtime +# validates that each cold-plugged VFIO device (GPU) resides on a host NUMA +# node covered by the guest NUMA topology, ensuring memory locality. Consider +# enabling this on multi-NUMA hosts with GPU passthrough. +enable_numa = @DEFENABLENUMA_NV@ # NUMA node mapping allows customizing how VM NUMA nodes map to host NUMA nodes. # Each entry defines a VM NUMA node and the host NUMA node(s) it maps to. diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 1c1ce20b01..2928389b1c 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -337,7 +337,12 @@ enable_iommu_platform = false # as is: map VM NUMA nodes to host 1:1 and bind vCPUs to related CPUs. # Note: To take proper advantage of NUMA, static_sandbox_resource_mgmt should # also be enabled for memory pre-allocation. -enable_numa = false +# +# GPU workloads strongly benefit from NUMA awareness: when enabled, the runtime +# validates that each cold-plugged VFIO device (GPU) resides on a host NUMA +# node covered by the guest NUMA topology, ensuring memory locality. Consider +# enabling this on multi-NUMA hosts with GPU passthrough. +enable_numa = @DEFENABLENUMA_NV@ # NUMA node mapping allows customizing how VM NUMA nodes map to host NUMA nodes. # Each entry defines a VM NUMA node and the host NUMA node(s) it maps to. diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index 49f9db0d6e..f373082129 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -319,7 +319,12 @@ enable_iommu_platform = false # as is: map VM NUMA nodes to host 1:1 and bind vCPUs to related CPUs. # Note: To take proper advantage of NUMA, static_sandbox_resource_mgmt should # also be enabled for memory pre-allocation. -enable_numa = false +# +# GPU workloads strongly benefit from NUMA awareness: when enabled, the runtime +# validates that each cold-plugged VFIO device (GPU) resides on a host NUMA +# node covered by the guest NUMA topology, ensuring memory locality. Consider +# enabling this on multi-NUMA hosts with GPU passthrough. +enable_numa = @DEFENABLENUMA_NV@ # NUMA node mapping allows customizing how VM NUMA nodes map to host NUMA nodes. # Each entry defines a VM NUMA node and the host NUMA node(s) it maps to. diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 811884a088..5a51f628ca 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -314,6 +314,11 @@ enable_iommu_platform = false # as is: map VM NUMA nodes to host 1:1 and bind vCPUs to related CPUs. # Note: To take proper advantage of NUMA, static_sandbox_resource_mgmt should # also be enabled for memory pre-allocation. +# +# When VFIO devices (e.g. GPUs) are cold-plugged and NUMA is enabled, the +# runtime validates that each device's host NUMA node is covered by the guest +# NUMA topology. A warning is logged if a device falls outside the configured +# nodes, indicating potential cross-NUMA memory access overhead. enable_numa = false # NUMA node mapping allows customizing how VM NUMA nodes map to host NUMA nodes.