virtcontainers: constrain docker container when sandbox_cgroup_only=true

The sandbox cgroup will be constrained if there is no container
type annotation, otherwise kata will rely on container engine's cgroup
configuration

Depends-on: github.com/kata-containers/tests#2255

fixes #2408

Signed-off-by: Julio Montes <julio.montes@intel.com>
This commit is contained in:
Julio Montes 2020-01-22 22:45:17 +00:00
parent 54482f18df
commit c3cf98aca6
7 changed files with 53 additions and 12 deletions

View File

@ -223,9 +223,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The sandbox cgroup is not constrained by the runtime
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

View File

@ -199,9 +199,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The sandbox cgroup is not constrained by the runtime
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

View File

@ -325,9 +325,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# if enable, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The sandbox cgroup is not constrained by the runtime
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

View File

@ -427,9 +427,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The sandbox cgroup is not constrained by the runtime
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

View File

@ -422,9 +422,9 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The sandbox cgroup is not constrained by the runtime
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

View File

@ -818,6 +818,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
// Spec: &ocispec,
Experimental: runtime.Experimental,
HasCRIContainerType: HasCRIContainerType(ocispec.Annotations),
}
if err := addAnnotations(ocispec, &sandboxConfig); err != nil {
@ -986,3 +988,14 @@ func GetOCIConfig(status vc.ContainerStatus) (specs.Spec, error) {
return *status.Spec, nil
}
// HasCRIContainerType returns true if annottations contain
// a CRI container type annotation
func HasCRIContainerType(annotations map[string]string) bool {
for _, key := range CRIContainerTypeKeyList {
if _, ok := annotations[key]; ok {
return true
}
}
return false
}

View File

@ -122,6 +122,9 @@ type SandboxConfig struct {
DisableGuestSeccomp bool
// HasCRIContainerType specifies whether container type was set explicitly through annotations or not.
HasCRIContainerType bool
// Experimental features enabled
Experimental []exp.Feature
@ -2044,28 +2047,49 @@ func (s *Sandbox) setupSandboxCgroup() error {
return nil
}
s.Logger().WithField("hasCRIContainerType", s.config.HasCRIContainerType).Debug("Setting sandbox cgroup")
s.state.CgroupPath, err = validCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup)
if err != nil {
return fmt.Errorf("Invalid cgroup path: %v", err)
}
// Do not change current cgroup configuration.
// Create a spec without constraints
unconstraintSpec := specs.Spec{
// Don't modify original resources, create a copy
resources := *spec.Linux.Resources
sandboxSpec := specs.Spec{
Linux: &specs.Linux{
Resources: &specs.LinuxResources{},
CgroupsPath: s.state.CgroupPath,
Resources: &resources,
},
}
cmgr, err := newCgroupManager(s.config.Cgroups, s.state.CgroupPaths, &unconstraintSpec)
// kata should rely on the cgroup created and configured by
// container engine *only* if actual container was
// marked *explicitly* as sandbox through annotations.
if s.config.HasCRIContainerType {
// Do not change current cgroup configuration.
// Create a spec without constraints
sandboxSpec.Linux.Resources = &specs.LinuxResources{}
}
sandboxSpec.Linux.CgroupsPath = s.state.CgroupPath
// Remove this to improve device resource management, but first we need to fix some issues:
// - hypervisors will need access to following host's devices:
// * /dev/kvm
// * /dev/vhost-net
// - If devicemapper is the storage driver, hypervisor will need access to devicemapper devices:
// * The list of cgroup devices MUST BE updated when a new container is created in the POD
sandboxSpec.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{}
cmgr, err := newCgroupManager(s.config.Cgroups, s.state.CgroupPaths, &sandboxSpec)
if err != nil {
return fmt.Errorf("Could not create a new cgroup manager: %v", err)
}
runtimePid := os.Getpid()
// Add the runtime to the Kata sandbox cgroup
if err := cmgr.Apply(runtimePid); err != nil {
if err = cmgr.Apply(runtimePid); err != nil {
return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err)
}
@ -2078,6 +2102,10 @@ func (s *Sandbox) setupSandboxCgroup() error {
s.state.CgroupPaths = cmgr.GetPaths()
if err = cmgr.Set(&configs.Config{Cgroups: s.config.Cgroups}); err != nil {
return fmt.Errorf("Could not constrain cgroup: %v", err)
}
return nil
}