Upgrade preparation to verify sysctl values containing forward slashes by regex

This commit is contained in:
Mengjiao Liu 2021-05-28 13:38:42 +08:00
parent bb24c265ce
commit 275d832ce2
11 changed files with 344 additions and 19 deletions

View File

@ -329,6 +329,20 @@ func usesHugePagesInProjectedEnv(item api.Container) bool {
return false
}
// hasSysctlsWithSlashNames returns true if the sysctl name contains a slash, otherwise it returns false
func hasSysctlsWithSlashNames(podSpec *api.PodSpec) bool {
if podSpec.SecurityContext == nil {
return false
}
securityContext := podSpec.SecurityContext
for _, s := range securityContext.Sysctls {
if strings.Contains(s.Name, "/") {
return true
}
}
return false
}
func checkContainerUseIndivisibleHugePagesValues(container api.Container) bool {
for resourceName, quantity := range container.Resources.Limits {
if helper.IsHugePageResourceName(resourceName) {
@ -420,6 +434,8 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
AllowExpandedDNSConfig: utilfeature.DefaultFeatureGate.Enabled(features.ExpandedDNSConfig) || haveSameExpandedDNSConfig(podSpec, oldPodSpec),
// Allow pod spec to use OS field
AllowOSField: utilfeature.DefaultFeatureGate.Enabled(features.IdentifyPodOS),
// The default sysctl value does not contain a forward slash, and in 1.24 we intend to relax this to be true by default
AllowSysctlRegexContainSlash: false,
}
if oldPodSpec != nil {
@ -440,6 +456,10 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
// if old spec used non-integer multiple of huge page unit size, we must allow it
opts.AllowIndivisibleHugePagesValues = usesIndivisibleHugePagesValues(oldPodSpec)
// if old spec used use relaxed validation for Update requests where the existing object's sysctl contains a slash, we must allow it.
opts.AllowSysctlRegexContainSlash = hasSysctlsWithSlashNames(oldPodSpec)
}
if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost {
// This is an update, so validate only if the existing object was valid.

View File

@ -3355,6 +3355,8 @@ type PodValidationOptions struct {
AllowExpandedDNSConfig bool
// Allow OSField to be set in the pod spec
AllowOSField bool
// Allow sysctl name to contain a slash
AllowSysctlRegexContainSlash bool
}
// validatePodMetadataAndSpec tests if required fields in the pod.metadata and pod.spec are set,
@ -3451,7 +3453,7 @@ func ValidatePodSpec(spec *core.PodSpec, podMeta *metav1.ObjectMeta, fldPath *fi
allErrs = append(allErrs, validateRestartPolicy(&spec.RestartPolicy, fldPath.Child("restartPolicy"))...)
allErrs = append(allErrs, validateDNSPolicy(&spec.DNSPolicy, fldPath.Child("dnsPolicy"))...)
allErrs = append(allErrs, unversionedvalidation.ValidateLabels(spec.NodeSelector, fldPath.Child("nodeSelector"))...)
allErrs = append(allErrs, ValidatePodSecurityContext(spec.SecurityContext, spec, fldPath, fldPath.Child("securityContext"))...)
allErrs = append(allErrs, ValidatePodSecurityContext(spec.SecurityContext, spec, fldPath, fldPath.Child("securityContext"), opts)...)
allErrs = append(allErrs, validateImagePullSecrets(spec.ImagePullSecrets, fldPath.Child("imagePullSecrets"))...)
allErrs = append(allErrs, validateAffinity(spec.Affinity, fldPath.Child("affinity"))...)
allErrs = append(allErrs, validatePodDNSConfig(spec.DNSConfig, &spec.DNSPolicy, fldPath.Child("dnsConfig"), opts)...)
@ -4006,29 +4008,48 @@ const (
// a sysctl name regex
SysctlFmt string = "(" + SysctlSegmentFmt + "\\.)*" + SysctlSegmentFmt
// a sysctl name regex with slash allowed
SysctlContainSlashFmt string = "(" + SysctlSegmentFmt + "[\\./])*" + SysctlSegmentFmt
// the maximal length of a sysctl name
SysctlMaxLength int = 253
)
var sysctlRegexp = regexp.MustCompile("^" + SysctlFmt + "$")
var sysctlContainSlashRegexp = regexp.MustCompile("^" + SysctlContainSlashFmt + "$")
// IsValidSysctlName checks that the given string is a valid sysctl name,
// i.e. matches SysctlFmt.
func IsValidSysctlName(name string) bool {
// i.e. matches SysctlFmt (or SysctlContainSlashFmt if canContainSlash is true).
// More info:
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func IsValidSysctlName(name string, canContainSlash bool) bool {
if len(name) > SysctlMaxLength {
return false
}
if canContainSlash {
return sysctlContainSlashRegexp.MatchString(name)
}
return sysctlRegexp.MatchString(name)
}
func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path) field.ErrorList {
func getSysctlFmt(canContainSlash bool) string {
if canContainSlash {
// use relaxed validation everywhere in 1.24
return SysctlContainSlashFmt
}
// Will be removed in 1.24
return SysctlFmt
}
func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path, allowSysctlRegexContainSlash bool) field.ErrorList {
allErrs := field.ErrorList{}
names := make(map[string]struct{})
for i, s := range sysctls {
if len(s.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
} else if !IsValidSysctlName(s.Name) {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, SysctlFmt)))
} else if !IsValidSysctlName(s.Name, allowSysctlRegexContainSlash) {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, getSysctlFmt(allowSysctlRegexContainSlash))))
} else if _, ok := names[s.Name]; ok {
allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name))
}
@ -4038,7 +4059,7 @@ func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path) field.ErrorList
}
// ValidatePodSecurityContext test that the specified PodSecurityContext has valid data.
func ValidatePodSecurityContext(securityContext *core.PodSecurityContext, spec *core.PodSpec, specPath, fldPath *field.Path) field.ErrorList {
func ValidatePodSecurityContext(securityContext *core.PodSecurityContext, spec *core.PodSpec, specPath, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
if securityContext != nil {
@ -4068,7 +4089,7 @@ func ValidatePodSecurityContext(securityContext *core.PodSecurityContext, spec *
}
if len(securityContext.Sysctls) != 0 {
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"))...)
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"), opts.AllowSysctlRegexContainSlash)...)
}
if securityContext.FSGroupChangePolicy != nil {

View File

@ -17765,13 +17765,35 @@ func TestIsValidSysctlName(t *testing.T) {
return string(x)
}(256),
}
containSlashesValid := []string{
"a/b/c/d",
"a/b.c",
}
containSlashesInvalid := []string{
"/",
"/a",
"a/abc*",
"a/b/*",
}
for _, s := range valid {
if !IsValidSysctlName(s) {
if !IsValidSysctlName(s, false) {
t.Errorf("%q expected to be a valid sysctl name", s)
}
}
for _, s := range invalid {
if IsValidSysctlName(s) {
if IsValidSysctlName(s, false) {
t.Errorf("%q expected to be an invalid sysctl name", s)
}
}
for _, s := range containSlashesValid {
if !IsValidSysctlName(s, true) {
t.Errorf("%q expected to be a valid sysctl name", s)
}
}
for _, s := range containSlashesInvalid {
if IsValidSysctlName(s, true) {
t.Errorf("%q expected to be an invalid sysctl name", s)
}
}
@ -17792,11 +17814,16 @@ func TestValidateSysctls(t *testing.T) {
"kernel.shmmax",
}
containSlashes := []string{
"net.ipv4.conf.enp3s0/200.forwarding",
"net/ipv4/conf/enp3s0.200/forwarding",
}
sysctls := make([]core.Sysctl, len(valid))
for i, sysctl := range valid {
sysctls[i].Name = sysctl
}
errs := validateSysctls(sysctls, field.NewPath("foo"))
errs := validateSysctls(sysctls, field.NewPath("foo"), false)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
@ -17805,7 +17832,7 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalid {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"))
errs = validateSysctls(sysctls, field.NewPath("foo"), false)
if len(errs) != 2 {
t.Errorf("expected 2 validation errors. Got: %v", errs)
} else {
@ -17821,12 +17848,21 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range duplicates {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"))
errs = validateSysctls(sysctls, field.NewPath("foo"), false)
if len(errs) != 1 {
t.Errorf("unexpected validation errors: %v", errs)
} else if errs[0].Type != field.ErrorTypeDuplicate {
t.Errorf("expected error type %v, got %v", field.ErrorTypeDuplicate, errs[0].Type)
}
sysctls = make([]core.Sysctl, len(containSlashes))
for i, sysctl := range containSlashes {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"), true)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
}
func newNodeNameEndpoint(nodeName string) *core.Endpoints {

View File

@ -359,13 +359,25 @@ const sysctlPatternSegmentFmt string = "([a-z0-9][-_a-z0-9]*)?[a-z0-9*]"
// SysctlPatternFmt is a regex used for matching valid sysctl patterns.
const SysctlPatternFmt string = "(" + apivalidation.SysctlSegmentFmt + "\\.)*" + sysctlPatternSegmentFmt
// SysctlContainSlashPatternFmt is a regex that contains a slash used for matching valid sysctl patterns.
const SysctlContainSlashPatternFmt string = "(" + apivalidation.SysctlSegmentFmt + "[\\./])*" + sysctlPatternSegmentFmt
var sysctlPatternRegexp = regexp.MustCompile("^" + SysctlPatternFmt + "$")
var sysctlContainSlashPatternRegexp = regexp.MustCompile("^" + SysctlContainSlashPatternFmt + "$")
// IsValidSysctlPattern checks if name is a valid sysctl pattern.
func IsValidSysctlPattern(name string) bool {
// i.e. matches sysctlPatternRegexp (or sysctlContainSlashPatternRegexp if canContainSlash is true).
// More info:
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func IsValidSysctlPattern(name string, canContainSlash bool) bool {
if len(name) > apivalidation.SysctlMaxLength {
return false
}
if canContainSlash {
return sysctlContainSlashPatternRegexp.MatchString(name)
}
return sysctlPatternRegexp.MatchString(name)
}
@ -422,7 +434,7 @@ func validatePodSecurityPolicySysctls(fldPath *field.Path, sysctls []string) fie
for i, s := range sysctls {
if len(s) == 0 {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i), sysctls[i], "empty sysctl not allowed"))
} else if !IsValidSysctlPattern(string(s)) {
} else if !IsValidSysctlPattern(string(s), false) {
allErrs = append(
allErrs,
field.Invalid(fldPath.Index(i), sysctls[i], fmt.Sprintf("must have at most %d characters and match regex %s",

View File

@ -834,12 +834,12 @@ func TestIsValidSysctlPattern(t *testing.T) {
}(256),
}
for _, s := range valid {
if !IsValidSysctlPattern(s) {
if !IsValidSysctlPattern(s, false) {
t.Errorf("%q expected to be a valid sysctl pattern", s)
}
}
for _, s := range invalid {
if IsValidSysctlPattern(s) {
if IsValidSysctlPattern(s, false) {
t.Errorf("%q expected to be an invalid sysctl pattern", s)
}
}

View File

@ -52,6 +52,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/metrics"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
"k8s.io/kubernetes/pkg/kubelet/sysctl"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/cache"
"k8s.io/kubernetes/pkg/kubelet/util/format"
@ -801,6 +802,17 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
metrics.StartedPodsTotal.Inc()
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
// ConvertPodSysctlsVariableToDotsSeparator converts sysctl variable
// in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator.
// runc uses the dot as the separator to verify whether the sysctl variable
// is correct in a separate namespace, so when using the slash as the sysctl
// variable separator, runc returns an error: "sysctl is not in a separate kernel namespace"
// and the podSandBox cannot be successfully created. Therefore, before calling runc,
// we need to convert the sysctl variable, the dot is used as a separator to separate the kernel namespace.
// When runc supports slash as sysctl separator, this function can no longer be used.
sysctl.ConvertPodSysctlsVariableToDotsSeparator(pod.Spec.SecurityContext)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
// createPodSandbox can return an error from CNI, CSI,

View File

@ -533,6 +533,64 @@ func TestSyncPod(t *testing.T) {
}
}
func TestSyncPodWithConvertedPodSysctls(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
containers := []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
securityContext := &v1.PodSecurityContext{
Sysctls: []v1.Sysctl{
{
Name: "kernel/shm_rmid_forced",
Value: "1",
},
{
Name: "net/ipv4/ip_local_port_range",
Value: "1024 65535",
},
},
}
exceptSysctls := []v1.Sysctl{
{
Name: "kernel.shm_rmid_forced",
Value: "1",
},
{
Name: "net.ipv4.ip_local_port_range",
Value: "1024 65535",
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
SecurityContext: securityContext,
},
}
backOff := flowcontrol.NewBackOff(time.Second, time.Minute)
result := m.SyncPod(pod, &kubecontainer.PodStatus{}, []v1.Secret{}, backOff)
assert.NoError(t, result.Error())
assert.Equal(t, exceptSysctls, pod.Spec.SecurityContext.Sysctls)
for _, sandbox := range fakeRuntime.Sandboxes {
assert.Equal(t, runtimeapi.PodSandboxState_SANDBOX_READY, sandbox.State)
}
for _, c := range fakeRuntime.Containers {
assert.Equal(t, runtimeapi.ContainerState_CONTAINER_RUNNING, c.State)
}
}
func TestPruneInitContainers(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)

View File

@ -48,13 +48,14 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
}
for _, s := range patterns {
if !policyvalidation.IsValidSysctlPattern(s) {
if !policyvalidation.IsValidSysctlPattern(s, true) {
return nil, fmt.Errorf("sysctl %q must have at most %d characters and match regex %s",
s,
validation.SysctlMaxLength,
policyvalidation.SysctlPatternFmt,
policyvalidation.SysctlContainSlashPatternFmt,
)
}
s = convertSysctlVariableToDotsSeparator(s)
if strings.HasSuffix(s, "*") {
prefix := s[:len(s)-1]
ns := NamespacedBy(prefix)
@ -81,6 +82,7 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
// respective namespaces with the host. This check is only possible for sysctls on
// the static default allowlist, not those on the custom allowlist provided by the admin.
func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
sysctl = convertSysctlVariableToDotsSeparator(sysctl)
nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found {
if ns == ipcNamespace && hostIPC {

View File

@ -29,10 +29,12 @@ func TestNewAllowlist(t *testing.T) {
}
for _, test := range []Test{
{sysctls: []string{"kernel.msg*", "kernel.sem"}},
{sysctls: []string{"kernel/msg*", "kernel/sem"}},
{sysctls: []string{" kernel.msg*"}, err: true},
{sysctls: []string{"kernel.msg* "}, err: true},
{sysctls: []string{"net.-"}, err: true},
{sysctls: []string{"net.*.foo"}, err: true},
{sysctls: []string{"net.*/foo"}, err: true},
{sysctls: []string{"foo"}, err: true},
} {
_, err := NewAllowlist(append(sysctl.SafeSysctlAllowlist(), test.sysctls...))
@ -51,9 +53,11 @@ func TestAllowlist(t *testing.T) {
}
valid := []Test{
{sysctl: "kernel.shm_rmid_forced"},
{sysctl: "kernel/shm_rmid_forced"},
{sysctl: "net.ipv4.ip_local_port_range"},
{sysctl: "kernel.msgmax"},
{sysctl: "kernel.sem"},
{sysctl: "kernel/sem"},
}
invalid := []Test{
{sysctl: "kernel.shm_rmid_forced", hostIPC: true},

View File

@ -0,0 +1,63 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"strings"
"k8s.io/api/core/v1"
)
// convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
// The '/' separator is also accepted in place of a '.'.
// Convert the sysctl variables to dots separator format for validation.
// More info:
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func convertSysctlVariableToDotsSeparator(val string) string {
if val == "" {
return val
}
firstSepIndex := strings.IndexAny(val, "./")
if firstSepIndex == -1 || val[firstSepIndex] == '.' {
return val
}
f := func(r rune) rune {
switch r {
case '.':
return '/'
case '/':
return '.'
}
return r
}
return strings.Map(f, val)
}
// ConvertPodSysctlsVariableToDotsSeparator converts sysctls variable in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator
// according to the linux sysctl conversion rules.
// see https://man7.org/linux/man-pages/man5/sysctl.d.5.html for more details.
func ConvertPodSysctlsVariableToDotsSeparator(securityContext *v1.PodSecurityContext) {
if securityContext == nil {
return
}
for i, sysctl := range securityContext.Sysctls {
securityContext.Sysctls[i].Name = convertSysctlVariableToDotsSeparator(sysctl.Name)
}
return
}

View File

@ -0,0 +1,97 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"testing"
v1 "k8s.io/api/core/v1"
"github.com/stretchr/testify/assert"
)
// TestConvertSysctlVariableToDotsSeparator tests whether the sysctl variable
// can be correctly converted to a dot as a separator.
func TestConvertSysctlVariableToDotsSeparator(t *testing.T) {
type testCase struct {
in string
out string
}
valid := []testCase{
{in: "kernel.shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "kernel/shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "net.ipv4.conf.eno2/100.rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/conf/eno2.100/rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/ip_local_port_range", out: "net.ipv4.ip_local_port_range"},
{in: "kernel/msgmax", out: "kernel.msgmax"},
{in: "kernel/sem", out: "kernel.sem"},
}
for _, test := range valid {
convertSysctlVal := convertSysctlVariableToDotsSeparator(test.in)
assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out)
}
}
// TestConvertPodSysctlsVariableToDotsSeparator tests whether the sysctls variable
// can be correctly converted to a dot as a separator.
func TestConvertPodSysctlsVariableToDotsSeparator(t *testing.T) {
sysctls := []v1.Sysctl{
{
Name: "kernel.msgmax",
Value: "8192",
},
{
Name: "kernel.shm_rmid_forced",
Value: "1",
},
{
Name: "net.ipv4.conf.eno2/100.rp_filter",
Value: "1",
},
{
Name: "net/ipv4/ip_local_port_range",
Value: "1024 65535",
},
}
exceptSysctls := []v1.Sysctl{
{
Name: "kernel.msgmax",
Value: "8192",
},
{
Name: "kernel.shm_rmid_forced",
Value: "1",
},
{
Name: "net.ipv4.conf.eno2/100.rp_filter",
Value: "1",
},
{
Name: "net.ipv4.ip_local_port_range",
Value: "1024 65535",
},
}
securityContext := &v1.PodSecurityContext{
Sysctls: sysctls,
}
ConvertPodSysctlsVariableToDotsSeparator(securityContext)
assert.Equalf(t, securityContext.Sysctls, exceptSysctls, "The sysctls name was not converted correctly. got: %s, want: %s", securityContext.Sysctls, exceptSysctls)
}