diff --git a/go.mod b/go.mod index d515653a952..a756e376323 100644 --- a/go.mod +++ b/go.mod @@ -65,7 +65,7 @@ require ( github.com/onsi/ginkgo v1.14.0 github.com/onsi/gomega v1.10.1 github.com/opencontainers/go-digest v1.0.0 - github.com/opencontainers/runc v1.0.1 + github.com/opencontainers/runc v1.0.2 github.com/opencontainers/selinux v1.8.2 github.com/pkg/errors v0.9.1 github.com/pmezard/go-difflib v1.0.0 @@ -360,7 +360,7 @@ replace ( github.com/onsi/gomega => github.com/onsi/gomega v1.10.1 github.com/opencontainers/go-digest => github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1 - github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.1 + github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.2 github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.2 github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0 diff --git a/go.sum b/go.sum index ce50b302caa..d161639cf2f 100644 --- a/go.sum +++ b/go.sum @@ -364,8 +364,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v1.0.1 h1:G18PGckGdAm3yVQRWDVQ1rLSLntiniKJ0cNRT2Tm5gs= -github.com/opencontainers/runc v1.0.1/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= +github.com/opencontainers/runc v1.0.2 h1:opHZMaswlyxz1OuGpBE53Dwe4/xF7EZTY0A2L/FpCOg= +github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc= diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 4936f4b166b..230173690d5 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -379,7 +379,8 @@ func getSupportedUnifiedControllers() sets.String { func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { resources := &libcontainerconfigs.Resources{ - SkipDevices: true, + SkipDevices: true, + SkipFreezeOnSet: true, } if resourceConfig == nil { return resources diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go index 31c1c874ea0..7ca8b8a8d40 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -4,6 +4,7 @@ package fs import ( "bufio" + "errors" "fmt" "os" "strconv" @@ -11,6 +12,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" + "golang.org/x/sys/unix" ) type CpuGroup struct{} @@ -71,15 +73,33 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead) } } + + var period string if r.CpuPeriod != 0 { - if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil { - return err + period = strconv.FormatUint(r.CpuPeriod, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + // Sometimes when the period to be set is smaller + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_period exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + period = "" } } if r.CpuQuota != 0 { if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { return err } + if period != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + return err + } + } } return s.SetRtSched(path, r) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go index 3506c82746d..06c6c2e95a6 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go @@ -310,6 +310,14 @@ func getUnitName(c *configs.Cgroup) string { return c.Name } +// This code should be in sync with getUnitName. +func getUnitType(unitName string) string { + if strings.HasSuffix(unitName, ".slice") { + return "Slice" + } + return "Scope" +} + // isDbusError returns true if the error is a specific dbus error. func isDbusError(err error, name string) bool { if err != nil { @@ -388,10 +396,10 @@ func resetFailedUnit(cm *dbusConnManager, name string) { } } -func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) { +func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) { var prop *systemdDbus.Property err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { - prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName) + prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName) return Err }) return prop, err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go index 1a8e1e3c6c1..cd4720c5d44 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go @@ -6,6 +6,7 @@ import ( "errors" "os" "path/filepath" + "reflect" "strings" "sync" @@ -345,6 +346,11 @@ func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) ( // Special case for SkipDevices, as used by Kubernetes to create pod // cgroups with allow-all device policy). if r.SkipDevices { + if r.SkipFreezeOnSet { + // Both needsFreeze and needsThaw are false. + return + } + // No need to freeze if SkipDevices is set, and either // (1) systemd unit does not (yet) exist, or // (2) it has DevicePolicy=auto and empty DeviceAllow list. @@ -353,15 +359,20 @@ func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) ( // a non-existent unit returns default properties, // and settings in (2) are the defaults. // - // Do not return errors from getUnitProperty, as they alone + // Do not return errors from getUnitTypeProperty, as they alone // should not prevent Set from working. - devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy") + + unitType := getUnitType(unitName) + + devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy") if e == nil && devPolicy.Value == dbus.MakeVariant("auto") { - devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow") - if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) { - needsFreeze = false - needsThaw = false - return + devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow") + if e == nil { + if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 { + needsFreeze = false + needsThaw = false + return + } } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go index a1e7f0afd44..5ea9d940cef 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go @@ -131,4 +131,16 @@ type Resources struct { // // NOTE it is impossible to start a container which has this flag set. SkipDevices bool `json:"-"` + + // SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup + // freeze when setting resources. Only applicable to systemd legacy + // (i.e. cgroup v1) manager (which uses freeze by default to avoid + // spurious permission errors caused by systemd inability to update + // device rules in a non-disruptive manner). + // + // If not set, a few methods (such as looking into cgroup's + // devices.list and querying the systemd unit properties) are used + // during Set() to figure out whether the freeze is required. Those + // methods may be relatively slow, thus this flag. + SkipFreezeOnSet bool `json:"-"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go index b14d0ede3bb..f80118f2c9b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -67,7 +67,7 @@ func InitSeccomp(config *configs.Seccomp) error { if call == nil { return errors.New("encountered nil syscall while initializing Seccomp") } - if err := matchCall(filter, call); err != nil { + if err := matchCall(filter, call, defaultAction); err != nil { return err } } @@ -142,7 +142,7 @@ func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { } // Add a rule to match a single syscall -func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error { if call == nil || filter == nil { return errors.New("cannot use nil as syscall to block") } @@ -151,6 +151,17 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { return errors.New("empty string is not a valid syscall") } + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action, call.ErrnoRet) + if err != nil { + return fmt.Errorf("action in seccomp profile is invalid: %w", err) + } + if callAct == defAct { + // This rule is redundant, silently skip it + // to avoid error from AddRule. + return nil + } + // If we can't resolve the syscall, assume it's not supported on this kernel // Ignore it, don't error out callNum, err := libseccomp.GetSyscallFromName(call.Name) @@ -158,12 +169,6 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { return nil } - // Convert the call's action to the libseccomp equivalent - callAct, err := getAction(call.Action, call.ErrnoRet) - if err != nil { - return fmt.Errorf("action in seccomp profile is invalid: %s", err) - } - // Unconditional match - just add the rule if len(call.Args) == 0 { if err := filter.AddRule(callNum, callAct); err != nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index 90ce9e1e8df..e47b44c10e2 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -616,7 +616,7 @@ github.com/opencontainers/go-digest # github.com/opencontainers/image-spec v1.0.1 => github.com/opencontainers/image-spec v1.0.1 github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.0.1 => github.com/opencontainers/runc v1.0.1 +# github.com/opencontainers/runc v1.0.2 => github.com/opencontainers/runc v1.0.2 ## explicit github.com/opencontainers/runc/libcontainer github.com/opencontainers/runc/libcontainer/apparmor @@ -2606,7 +2606,7 @@ sigs.k8s.io/yaml # github.com/onsi/gomega => github.com/onsi/gomega v1.10.1 # github.com/opencontainers/go-digest => github.com/opencontainers/go-digest v1.0.0 # github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1 -# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.1 +# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.2 # github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 # github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.2 # github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0