diff --git a/src/runtime/pkg/containerd-shim-v2/create.go b/src/runtime/pkg/containerd-shim-v2/create.go index c272c89d4e..ab5afd2a07 100644 --- a/src/runtime/pkg/containerd-shim-v2/create.go +++ b/src/runtime/pkg/containerd-shim-v2/create.go @@ -112,19 +112,16 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con if s.sandbox != nil { return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID()) } + // Here we deal with CDI devices that are cold-plugged (k8s) and + // for the single_container (nerdctl, podman, ...) use-case. // We can provide additional directories where to search for // CDI specs if needed. immutable OS's only have specific // directories where applications can write too. For instance /opt/cdi // // _, err = withCDI(ociSpec.Annotations, []string{"/opt/cdi"}, ociSpec) - // - // Only inject CDI devices if single_container we do not want - // CDI devices in the pod_sandbox - if containerType == vc.SingleContainer { - _, err = config.WithCDI(ociSpec.Annotations, []string{}, ociSpec) - if err != nil { - return nil, fmt.Errorf("adding CDI devices failed: %w", err) - } + _, err = config.WithCDI(ociSpec.Annotations, []string{}, ociSpec) + if err != nil { + return nil, fmt.Errorf("adding CDI devices failed: %w", err) } s.config = runtimeConfig diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index d2f6d89288..42bcccff8a 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -697,7 +697,14 @@ func WithCDI(annotations map[string]string, cdiSpecDirs []string, spec *specs.Sp if _, err := registry.InjectDevices(spec, devsFromAnnotations...); err != nil { return nil, fmt.Errorf("CDI device injection failed: %w", err) } - + // Once we injected the device into the ociSpec we do not need to CDI + // device annotation from the outer runtime. The runtime will create the + // appropriate inner runtime CDI annotation dependent on the device. + for key := range spec.Annotations { + if strings.HasPrefix(key, cdi.AnnotationPrefix) { + delete(spec.Annotations, key) + } + } // One crucial thing to keep in mind is that CDI device injection // might add OCI Spec environment variables, hooks, and mounts as // well. Therefore it is important that none of the corresponding diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 536fbcff13..b736229f5e 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -842,6 +842,20 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { coldPlugDevices := []config.DeviceInfo{} for i, vfio := range deviceInfos { + // If device is already attached during sandbox creation, e.g. + // with an CDI annotation, skip it in the container creation and + // only create the proper CDI annotation for the kata-agent + for _, dev := range config.PCIeDevicesPerPort["root-port"] { + if dev.HostPath == vfio.ContainerPath { + c.Logger().Warnf("device %s already attached to the sandbox, skipping", vfio.ContainerPath) + } + } + for _, dev := range config.PCIeDevicesPerPort["switch-port"] { + if dev.HostPath == vfio.ContainerPath { + c.Logger().Warnf("device %s already attached to the sandbox, skipping", vfio.ContainerPath) + } + } + // Only considering VFIO updates for Port and ColdPlug or // HotPlug updates isVFIODevice := deviceManager.IsVFIODevice(vfio.ContainerPath) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index b8b2fb5404..d0b560a922 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -723,79 +723,7 @@ func (s *Sandbox) coldOrHotPlugVFIO(sandboxConfig *SandboxConfig) (bool, error) // for correct number of PCIe root ports. var vhostUserBlkDevices []config.DeviceInfo - //io.katacontainers.pkg.oci.container_type:pod_sandbox - for cnt, container := range sandboxConfig.Containers { - // Do not alter the original spec, we do not want to inject - // CDI devices into the sandbox container, were using the CDI - // devices as additional information to determine the number of - // PCIe root ports to reserve for the hypervisor. - // A single_container type will have the CDI devices injected - // only do this if we're a pod_sandbox type. - if container.Annotations["io.katacontainers.pkg.oci.container_type"] == "pod_sandbox" && container.CustomSpec != nil { - cdiSpec := container.CustomSpec - // We can provide additional directories where to search for - // CDI specs if needed. immutable OS's only have specific - // directories where applications can write too. For instance /opt/cdi - // - // _, err = withCDI(ociSpec.Annotations, []string{"/opt/cdi"}, ociSpec) - // - _, err := config.WithCDI(cdiSpec.Annotations, []string{}, cdiSpec) - if err != nil { - return coldPlugVFIO, fmt.Errorf("adding CDI devices failed: %w", err) - } - - for _, dev := range cdiSpec.Linux.Devices { - isVFIODevice := deviceManager.IsVFIODevice(dev.Path) - if hotPlugVFIO && isVFIODevice { - vfioDev := config.DeviceInfo{ - ColdPlug: true, - ContainerPath: dev.Path, - Port: sandboxConfig.HypervisorConfig.HotPlugVFIO, - DevType: dev.Type, - Major: dev.Major, - Minor: dev.Minor, - } - if dev.FileMode != nil { - vfioDev.FileMode = *dev.FileMode - } - if dev.UID != nil { - vfioDev.UID = *dev.UID - } - if dev.GID != nil { - vfioDev.GID = *dev.GID - } - - vfioDevices = append(vfioDevices, vfioDev) - continue - } - if coldPlugVFIO && isVFIODevice { - vfioDev := config.DeviceInfo{ - ColdPlug: true, - ContainerPath: dev.Path, - Port: sandboxConfig.HypervisorConfig.ColdPlugVFIO, - DevType: dev.Type, - Major: dev.Major, - Minor: dev.Minor, - } - if dev.FileMode != nil { - vfioDev.FileMode = *dev.FileMode - } - if dev.UID != nil { - vfioDev.UID = *dev.UID - } - if dev.GID != nil { - vfioDev.GID = *dev.GID - } - - vfioDevices = append(vfioDevices, vfioDev) - continue - } - } - } - // As stated before the single_container will have the CDI - // devices injected by the runtime. For the pod_container use-case - // see container.go how cold and hot-plug are handled. for dev, device := range container.DeviceInfos { if deviceManager.IsVhostUserBlk(device) { vhostUserBlkDevices = append(vhostUserBlkDevices, device)