diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 61616f6ad3..776a7e02f3 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -94,6 +94,8 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon | | `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest | | `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) | +| `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection | +| `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection | ## Container Options | Key | Value Type | Comments | diff --git a/src/runtime/config/configuration-remote.toml.in b/src/runtime/config/configuration-remote.toml.in index 6e3651cf21..b89d804fda 100644 --- a/src/runtime/config/configuration-remote.toml.in +++ b/src/runtime/config/configuration-remote.toml.in @@ -38,7 +38,7 @@ remote_hypervisor_timeout = 600 # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" # Note: Remote hypervisor is only handling the following annotations -enable_annotations = ["machine_type", "default_memory", "default_vcpus", "image"] +enable_annotations = ["machine_type", "default_memory", "default_vcpus", "image", "default_gpus", "gpu_model"] # Optional space-separated list of options to pass to the guest kernel. # For example, use `kernel_params = "vsyscall=emulate"` if you are having diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index eaae4b6e7c..d0d743652d 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -560,6 +560,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, config.HypervisorConfig.Initdata = initdata } + if err := addHypervisorGPUOverrides(ocispec, config); err != nil { + return err + } + return nil } @@ -754,6 +758,26 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e }) } +func addHypervisorGPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if sbConfig.HypervisorType != vc.RemoteHypervisor { + return nil + } + + if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultGPUs).setUint(func(gpus uint64) { + sbConfig.HypervisorConfig.DefaultGPUs = uint32(gpus) + }); err != nil { + return err + } + + if value, ok := ocispec.Annotations[vcAnnotations.DefaultGPUModel]; ok { + if value != "" { + sbConfig.HypervisorConfig.DefaultGPUModel = value + } + } + + return nil +} + func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceDriver]; ok { supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm, config.VirtioBlockCCW} diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index f09d830032..c4c91cb590 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -775,6 +775,23 @@ func TestAddRemoteHypervisorAnnotations(t *testing.T) { err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.NoError(err) assert.Equal(sbConfig.HypervisorConfig.Initdata, "initdata") + + // When GPU annotations are specified, remote hypervisor annotations have the annotation added + ocispec.Annotations[vcAnnotations.DefaultGPUs] = "-1" + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) + assert.Error(err) + + ocispec.Annotations[vcAnnotations.DefaultGPUs] = "1" + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) + assert.NoError(err) + assert.Equal(sbConfig.HypervisorConfig.DefaultGPUs, uint32(1)) + + // When GPU annotations are specified, remote hypervisor annotations have the annotation added + ocispec.Annotations[vcAnnotations.DefaultGPUModel] = "tesla" + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) + assert.NoError(err) + assert.Equal(sbConfig.HypervisorConfig.DefaultGPUModel, "tesla") + } func TestAddProtectedHypervisorAnnotations(t *testing.T) { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 5eb922980b..cad5e85d74 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -673,6 +673,12 @@ type HypervisorConfig struct { // Initdata defines the initdata passed into guest when CreateVM Initdata string + + // GPU specific annotations (currently only applicable for Remote Hypervisor) + //DefaultGPUs specifies the number of GPUs required for the Kata VM + DefaultGPUs uint32 + // DefaultGPUModel specifies GPU model like tesla, h100, readeon etc. + DefaultGPUModel string } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index f046f38a7e..e71b0525c1 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -132,6 +132,12 @@ const ( // UseLegacySerial sets legacy serial device for guest console if available and implemented for architecture UseLegacySerial = kataAnnotHypervisorPrefix + "use_legacy_serial" + // GPU specific annotations used by remote hypervisor for instance selection + // Number of GPUs required in the Kata VM + DefaultGPUs = kataAnnotHypervisorPrefix + "default_gpus" + // GPU model - tesla, h100, radeon etc.. + DefaultGPUModel = kataAnnotHypervisorPrefix + "default_gpu_model" + // // CPU Annotations // diff --git a/src/runtime/virtcontainers/remote.go b/src/runtime/virtcontainers/remote.go index 047f09fe8c..4330042a09 100644 --- a/src/runtime/virtcontainers/remote.go +++ b/src/runtime/virtcontainers/remote.go @@ -81,6 +81,8 @@ func (rh *remoteHypervisor) CreateVM(ctx context.Context, id string, network Net annotations[hypannotations.DefaultVCPUs] = strconv.FormatUint(uint64(hypervisorConfig.NumVCPUs()), 10) annotations[hypannotations.DefaultMemory] = strconv.FormatUint(uint64(hypervisorConfig.MemorySize), 10) annotations[hypannotations.Initdata] = hypervisorConfig.Initdata + annotations[hypannotations.DefaultGPUs] = strconv.FormatUint(uint64(hypervisorConfig.DefaultGPUs), 10) + annotations[hypannotations.DefaultGPUModel] = hypervisorConfig.DefaultGPUModel req := &pb.CreateVMRequest{ Id: id,