runtime: Add GPU annotations for remote hypervisor

Add GPU annotations for remote hypervisor to help with the right instance selection based on number of GPUs and model Signed-off-by: Pradipta Banerjee <pradipta.banerjee@gmail.com>
2025-08-02 00:02:01 +00:00 · 2024-10-23 13:22:59 -04:00 · 2024-10-23 13:22:59 -04:00 · 6f1ba007ed
commit 6f1ba007ed
parent 68225b53ca
7 changed files with 58 additions and 1 deletions
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@ -94,6 +94,8 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
 | `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
 | `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
 | `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection |
 | `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection |
 ## Container Options
 | Key | Value Type | Comments |
--- a/src/runtime/config/configuration-remote.toml.in
+++ b/src/runtime/config/configuration-remote.toml.in
@ -38,7 +38,7 @@ remote_hypervisor_timeout = 600
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
 # Note: Remote hypervisor is only handling the following annotations
-enable_annotations = ["machine_type", "default_memory", "default_vcpus", "image"]
+enable_annotations = ["machine_type", "default_memory", "default_vcpus", "image", "default_gpus", "gpu_model"]
 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
--- a/src/runtime/pkg/oci/utils.go
+++ b/src/runtime/pkg/oci/utils.go
@ -560,6 +560,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
 		config.HypervisorConfig.Initdata = initdata
 	}
 	if err := addHypervisorGPUOverrides(ocispec, config); err != nil {
 		return err
 	}
 	return nil
 }
@ -754,6 +758,26 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e
 	})
 }
 func addHypervisorGPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
 	if sbConfig.HypervisorType != vc.RemoteHypervisor {
 		return nil
 	}
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultGPUs).setUint(func(gpus uint64) {
 		sbConfig.HypervisorConfig.DefaultGPUs = uint32(gpus)
 	}); err != nil {
 		return err
 	}
 	if value, ok := ocispec.Annotations[vcAnnotations.DefaultGPUModel]; ok {
 		if value != "" {
 			sbConfig.HypervisorConfig.DefaultGPUModel = value
 		}
 	}
 	return nil
 }
 func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
 	if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceDriver]; ok {
 		supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm, config.VirtioBlockCCW}
--- a/src/runtime/pkg/oci/utils_test.go
+++ b/src/runtime/pkg/oci/utils_test.go
@ -775,6 +775,23 @@ func TestAddRemoteHypervisorAnnotations(t *testing.T) {
 	err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
 	assert.NoError(err)
 	assert.Equal(sbConfig.HypervisorConfig.Initdata, "initdata")
 	// When GPU annotations are specified, remote hypervisor annotations have the annotation added
 	ocispec.Annotations[vcAnnotations.DefaultGPUs] = "-1"
 	err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
 	assert.Error(err)
 	ocispec.Annotations[vcAnnotations.DefaultGPUs] = "1"
 	err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
 	assert.NoError(err)
 	assert.Equal(sbConfig.HypervisorConfig.DefaultGPUs, uint32(1))
 	// When GPU annotations are specified, remote hypervisor annotations have the annotation added
 	ocispec.Annotations[vcAnnotations.DefaultGPUModel] = "tesla"
 	err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
 	assert.NoError(err)
 	assert.Equal(sbConfig.HypervisorConfig.DefaultGPUModel, "tesla")
 }
 func TestAddProtectedHypervisorAnnotations(t *testing.T) {
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@ -673,6 +673,12 @@ type HypervisorConfig struct {
 	// Initdata defines the initdata passed into guest when CreateVM
 	Initdata string
 	// GPU specific annotations (currently only applicable for Remote Hypervisor)
 	//DefaultGPUs specifies the number of GPUs required for the Kata VM
 	DefaultGPUs uint32
 	// DefaultGPUModel specifies GPU model like tesla, h100, readeon etc.
 	DefaultGPUModel string
 }
 // vcpu mapping from vcpu number to thread number
--- a/src/runtime/virtcontainers/pkg/annotations/annotations.go
+++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go
@ -132,6 +132,12 @@ const (
 	// UseLegacySerial sets legacy serial device for guest console if available and implemented for architecture
 	UseLegacySerial = kataAnnotHypervisorPrefix + "use_legacy_serial"
 	// GPU specific annotations used by remote hypervisor for instance selection
 	// Number of GPUs required in the Kata VM
 	DefaultGPUs = kataAnnotHypervisorPrefix + "default_gpus"
 	// GPU model - tesla, h100, radeon etc..
 	DefaultGPUModel = kataAnnotHypervisorPrefix + "default_gpu_model"
 	//
 	// CPU Annotations
 	//
--- a/src/runtime/virtcontainers/remote.go
+++ b/src/runtime/virtcontainers/remote.go
@ -81,6 +81,8 @@ func (rh *remoteHypervisor) CreateVM(ctx context.Context, id string, network Net
 	annotations[hypannotations.DefaultVCPUs] = strconv.FormatUint(uint64(hypervisorConfig.NumVCPUs()), 10)
 	annotations[hypannotations.DefaultMemory] = strconv.FormatUint(uint64(hypervisorConfig.MemorySize), 10)
 	annotations[hypannotations.Initdata] = hypervisorConfig.Initdata
 	annotations[hypannotations.DefaultGPUs] = strconv.FormatUint(uint64(hypervisorConfig.DefaultGPUs), 10)
 	annotations[hypannotations.DefaultGPUModel] = hypervisorConfig.DefaultGPUModel
 	req := &pb.CreateVMRequest{
 		Id:                   id,