diff --git a/cli/config/configuration-acrn.toml.in b/cli/config/configuration-acrn.toml.in index 98d91082bb..d1c581243c 100644 --- a/cli/config/configuration-acrn.toml.in +++ b/cli/config/configuration-acrn.toml.in @@ -223,9 +223,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # if enabled, the runtime will add all the kata processes inside one dedicated cgroup. # The container cgroups in the host are not created, just one single cgroup per sandbox. -# The sandbox cgroup is not constrained by the runtime # The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ diff --git a/cli/config/configuration-clh.toml.in b/cli/config/configuration-clh.toml.in index 7692584c42..00185ea2e4 100644 --- a/cli/config/configuration-clh.toml.in +++ b/cli/config/configuration-clh.toml.in @@ -199,9 +199,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # if enabled, the runtime will add all the kata processes inside one dedicated cgroup. # The container cgroups in the host are not created, just one single cgroup per sandbox. -# The sandbox cgroup is not constrained by the runtime # The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ diff --git a/cli/config/configuration-fc.toml.in b/cli/config/configuration-fc.toml.in index 99d1a487eb..c9850d974b 100644 --- a/cli/config/configuration-fc.toml.in +++ b/cli/config/configuration-fc.toml.in @@ -325,9 +325,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # if enable, the runtime will add all the kata processes inside one dedicated cgroup. # The container cgroups in the host are not created, just one single cgroup per sandbox. -# The sandbox cgroup is not constrained by the runtime # The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ diff --git a/cli/config/configuration-qemu-virtiofs.toml.in b/cli/config/configuration-qemu-virtiofs.toml.in index a6e4030041..a38e0a0d19 100644 --- a/cli/config/configuration-qemu-virtiofs.toml.in +++ b/cli/config/configuration-qemu-virtiofs.toml.in @@ -427,9 +427,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # if enabled, the runtime will add all the kata processes inside one dedicated cgroup. # The container cgroups in the host are not created, just one single cgroup per sandbox. -# The sandbox cgroup is not constrained by the runtime # The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ diff --git a/cli/config/configuration-qemu.toml.in b/cli/config/configuration-qemu.toml.in index d87e5269ff..aa77755695 100644 --- a/cli/config/configuration-qemu.toml.in +++ b/cli/config/configuration-qemu.toml.in @@ -422,9 +422,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ # if enabled, the runtime will add all the kata processes inside one dedicated cgroup. # The container cgroups in the host are not created, just one single cgroup per sandbox. -# The sandbox cgroup is not constrained by the runtime # The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index 4cf6d5f100..3e01bea2ba 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -818,6 +818,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c // Spec: &ocispec, Experimental: runtime.Experimental, + + HasCRIContainerType: HasCRIContainerType(ocispec.Annotations), } if err := addAnnotations(ocispec, &sandboxConfig); err != nil { @@ -986,3 +988,14 @@ func GetOCIConfig(status vc.ContainerStatus) (specs.Spec, error) { return *status.Spec, nil } + +// HasCRIContainerType returns true if annottations contain +// a CRI container type annotation +func HasCRIContainerType(annotations map[string]string) bool { + for _, key := range CRIContainerTypeKeyList { + if _, ok := annotations[key]; ok { + return true + } + } + return false +} diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 3957cc9139..19caccc15c 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -122,12 +122,15 @@ type SandboxConfig struct { DisableGuestSeccomp bool + // HasCRIContainerType specifies whether container type was set explicitly through annotations or not. + HasCRIContainerType bool + // Experimental features enabled Experimental []exp.Feature // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available - Cgroups *configs.Cgroup `json:"cgroups"` + Cgroups *configs.Cgroup } func (s *Sandbox) trace(name string) (opentracing.Span, context.Context) { @@ -2044,28 +2047,49 @@ func (s *Sandbox) setupSandboxCgroup() error { return nil } + s.Logger().WithField("hasCRIContainerType", s.config.HasCRIContainerType).Debug("Setting sandbox cgroup") + s.state.CgroupPath, err = validCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup) if err != nil { return fmt.Errorf("Invalid cgroup path: %v", err) } - // Do not change current cgroup configuration. - // Create a spec without constraints - unconstraintSpec := specs.Spec{ + // Don't modify original resources, create a copy + resources := *spec.Linux.Resources + sandboxSpec := specs.Spec{ Linux: &specs.Linux{ - Resources: &specs.LinuxResources{}, - CgroupsPath: s.state.CgroupPath, + Resources: &resources, }, } - cmgr, err := newCgroupManager(s.config.Cgroups, s.state.CgroupPaths, &unconstraintSpec) + // kata should rely on the cgroup created and configured by + // container engine *only* if actual container was + // marked *explicitly* as sandbox through annotations. + if s.config.HasCRIContainerType { + // Do not change current cgroup configuration. + // Create a spec without constraints + sandboxSpec.Linux.Resources = &specs.LinuxResources{} + } + + sandboxSpec.Linux.CgroupsPath = s.state.CgroupPath + + // Remove this to improve device resource management, but first we need to fix some issues: + // - hypervisors will need access to following host's devices: + // * /dev/kvm + // * /dev/vhost-net + // - If devicemapper is the storage driver, hypervisor will need access to devicemapper devices: + // * The list of cgroup devices MUST BE updated when a new container is created in the POD + sandboxSpec.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{} + + cmgr, err := newCgroupManager(s.config.Cgroups, s.state.CgroupPaths, &sandboxSpec) if err != nil { return fmt.Errorf("Could not create a new cgroup manager: %v", err) } runtimePid := os.Getpid() + // Add the runtime to the Kata sandbox cgroup - if err := cmgr.Apply(runtimePid); err != nil { + if err = cmgr.Apply(runtimePid); err != nil { return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) } @@ -2078,6 +2102,10 @@ func (s *Sandbox) setupSandboxCgroup() error { s.state.CgroupPaths = cmgr.GetPaths() + if err = cmgr.Set(&configs.Config{Cgroups: s.config.Cgroups}); err != nil { + return fmt.Errorf("Could not constrain cgroup: %v", err) + } + return nil }