valid error for creation and update from valid to invalid only

- using an option AllowNamespacedSysctlsForHostNetAndHostIPC

Signed-off-by: Paco Xu <paco.xu@daocloud.io>
This commit is contained in:
Paco Xu 2023-07-17 13:59:24 +08:00
parent 9a8ccdebc5
commit 36d6917ae1
10 changed files with 154 additions and 82 deletions

View File

@ -365,6 +365,7 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
AllowInvalidLabelValueInSelector: false, AllowInvalidLabelValueInSelector: false,
AllowInvalidTopologySpreadConstraintLabelSelector: false, AllowInvalidTopologySpreadConstraintLabelSelector: false,
AllowMutableNodeSelectorAndNodeAffinity: utilfeature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness), AllowMutableNodeSelectorAndNodeAffinity: utilfeature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
} }
if oldPodSpec != nil { if oldPodSpec != nil {
@ -377,6 +378,17 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
opts.AllowInvalidLabelValueInSelector = hasInvalidLabelValueInAffinitySelector(oldPodSpec) opts.AllowInvalidLabelValueInSelector = hasInvalidLabelValueInAffinitySelector(oldPodSpec)
// if old spec has invalid labelSelector in topologySpreadConstraint, we must allow it // if old spec has invalid labelSelector in topologySpreadConstraint, we must allow it
opts.AllowInvalidTopologySpreadConstraintLabelSelector = hasInvalidTopologySpreadConstraintLabelSelector(oldPodSpec) opts.AllowInvalidTopologySpreadConstraintLabelSelector = hasInvalidTopologySpreadConstraintLabelSelector(oldPodSpec)
// if old spec has invalid sysctl with hostNet or hostIPC, we must allow it when update
if oldPodSpec.SecurityContext != nil && len(oldPodSpec.SecurityContext.Sysctls) != 0 {
for _, s := range oldPodSpec.SecurityContext.Sysctls {
err := apivalidation.ValidateHostSysctl(s.Name, oldPodSpec.SecurityContext, nil)
if err != nil {
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
break
}
}
}
} }
if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost { if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost {
// This is an update, so validate only if the existing object was valid. // This is an update, so validate only if the existing object was valid.

View File

@ -3813,6 +3813,8 @@ type PodValidationOptions struct {
AllowInvalidTopologySpreadConstraintLabelSelector bool AllowInvalidTopologySpreadConstraintLabelSelector bool
// Allow node selector additions for gated pods. // Allow node selector additions for gated pods.
AllowMutableNodeSelectorAndNodeAffinity bool AllowMutableNodeSelectorAndNodeAffinity bool
// Allow namespaced sysctls in hostNet and hostIPC pods
AllowNamespacedSysctlsForHostNetAndHostIPC bool
// The top-level resource being validated is a Pod, not just a PodSpec // The top-level resource being validated is a Pod, not just a PodSpec
// embedded in some other resource. // embedded in some other resource.
ResourceIsPod bool ResourceIsPod bool
@ -4563,10 +4565,10 @@ func IsValidSysctlName(name string) bool {
return sysctlContainSlashRegexp.MatchString(name) return sysctlContainSlashRegexp.MatchString(name)
} }
func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path, hostNetwork, hostIPC bool) field.ErrorList { func validateSysctls(securityContext *core.PodSecurityContext, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{} allErrs := field.ErrorList{}
names := make(map[string]struct{}) names := make(map[string]struct{})
for i, s := range sysctls { for i, s := range securityContext.Sysctls {
if len(s.Name) == 0 { if len(s.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), "")) allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
} else if !IsValidSysctlName(s.Name) { } else if !IsValidSysctlName(s.Name) {
@ -4574,41 +4576,27 @@ func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path, hostNetwork, ho
} else if _, ok := names[s.Name]; ok { } else if _, ok := names[s.Name]; ok {
allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name)) allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name))
} }
// The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the if !opts.AllowNamespacedSysctlsForHostNetAndHostIPC {
// respective namespaces with the host. err := ValidateHostSysctl(s.Name, securityContext, fldPath.Index(i).Child("name"))
if !isValidSysctlWithHostNetIPC(s.Name, hostNetwork, hostIPC) { if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, "sysctl not allowed with host net/ipc enabled")) allErrs = append(allErrs, err)
}
} }
names[s.Name] = struct{}{} names[s.Name] = struct{}{}
} }
return allErrs return allErrs
} }
func isValidSysctlWithHostNetIPC(sysctl string, hostNet, hostIPC bool) bool { // ValidateHostSysctl will return error if namespaced sysctls is applied to pod sharing the respective namespaces with the host.
sysctl = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl) func ValidateHostSysctl(sysctl string, securityContext *core.PodSecurityContext, fldPath *field.Path) *field.Error {
var ns utilsysctl.Namespace ns, _, _ := utilsysctl.GetNamespace(sysctl)
if strings.HasSuffix(sysctl, "*") { switch {
prefix := sysctl[:len(sysctl)-1] case securityContext.HostNetwork && ns == utilsysctl.NetNamespace:
ns = utilsysctl.NamespacedBy(prefix) return field.Invalid(fldPath, sysctl, "may not be specified when 'hostNetwork' is true")
if ns == utilsysctl.UnknownNamespace { case securityContext.HostIPC && ns == utilsysctl.IPCNamespace:
// don't handle unknown namespace here return field.Invalid(fldPath, sysctl, "may not be specified when 'hostIPC' is true")
return true
} }
} else { return nil
ns = utilsysctl.NamespacedBy(sysctl)
if ns == utilsysctl.UnknownNamespace {
// don't handle unknown namespace here
return true
}
}
if ns == utilsysctl.IpcNamespace && hostIPC {
return false
}
if ns == utilsysctl.NetNamespace && hostNet {
return false
}
return true
} }
// validatePodSpecSecurityContext verifies the SecurityContext of a PodSpec, // validatePodSpecSecurityContext verifies the SecurityContext of a PodSpec,
@ -4643,13 +4631,7 @@ func validatePodSpecSecurityContext(securityContext *core.PodSecurityContext, sp
} }
if len(securityContext.Sysctls) != 0 { if len(securityContext.Sysctls) != 0 {
var hostNetwork, hostIPC bool allErrs = append(allErrs, validateSysctls(securityContext, fldPath.Child("sysctls"), opts)...)
if spec.SecurityContext != nil {
hostNetwork = spec.SecurityContext.HostNetwork
hostIPC = spec.SecurityContext.HostIPC
}
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"), hostNetwork, hostIPC)...)
} }
if securityContext.FSGroupChangePolicy != nil { if securityContext.FSGroupChangePolicy != nil {

View File

@ -21505,12 +21505,18 @@ func TestValidateSysctls(t *testing.T) {
"kernel.shmmax", "kernel.shmmax",
"kernel.shmmax", "kernel.shmmax",
} }
opts := PodValidationOptions{
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
}
sysctls := make([]core.Sysctl, len(valid)) sysctls := make([]core.Sysctl, len(valid))
validSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
for i, sysctl := range valid { for i, sysctl := range valid {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs := validateSysctls(sysctls, field.NewPath("foo"), false, false) errs := validateSysctls(validSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 0 { if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} }
@ -21519,7 +21525,10 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalid { for i, sysctl := range invalid {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo"), false, false) inValidSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(inValidSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 2 { if len(errs) != 2 {
t.Errorf("expected 2 validation errors. Got: %v", errs) t.Errorf("expected 2 validation errors. Got: %v", errs)
} else { } else {
@ -21535,7 +21544,10 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range duplicates { for i, sysctl := range duplicates {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo"), false, false) securityContextWithDup := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(securityContextWithDup, field.NewPath("foo"), opts)
if len(errs) != 1 { if len(errs) != 1 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} else if errs[0].Type != field.ErrorTypeDuplicate { } else if errs[0].Type != field.ErrorTypeDuplicate {
@ -21546,19 +21558,40 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalidWithHostNet { for i, sysctl := range invalidWithHostNet {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo"), true, false) invalidSecurityContextWithHostNet := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: false,
HostNetwork: true,
}
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 2 { if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} }
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
sysctls = make([]core.Sysctl, len(invalidWithHostIPC)) sysctls = make([]core.Sysctl, len(invalidWithHostIPC))
for i, sysctl := range invalidWithHostIPC { for i, sysctl := range invalidWithHostIPC {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo"), false, true) invalidSecurityContextWithHostIPC := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: true,
HostNetwork: false,
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = false
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 2 { if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} }
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
} }
func newNodeNameEndpoint(nodeName string) *core.Endpoints { func newNodeNameEndpoint(nodeName string) *core.Endpoints {

View File

@ -55,20 +55,14 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
policyvalidation.SysctlContainSlashPatternFmt, policyvalidation.SysctlContainSlashPatternFmt,
) )
} }
s = utilsysctl.ConvertSysctlVariableToDotsSeparator(s) ns, sysctlOrPrefix, prefixed := utilsysctl.GetNamespace(s)
if strings.HasSuffix(s, "*") {
prefix := s[:len(s)-1]
ns := utilsysctl.NamespacedBy(prefix)
if ns == utilsysctl.UnknownNamespace { if ns == utilsysctl.UnknownNamespace {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s) return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", sysctlOrPrefix)
} }
w.prefixes[prefix] = ns if prefixed {
w.prefixes[sysctlOrPrefix] = ns
} else { } else {
ns := utilsysctl.NamespacedBy(s) w.sysctls[sysctlOrPrefix] = ns
if ns == utilsysctl.UnknownNamespace {
return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
}
w.sysctls[s] = ns
} }
} }
return w, nil return w, nil
@ -82,10 +76,10 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
// respective namespaces with the host. This check is only possible for sysctls on // respective namespaces with the host. This check is only possible for sysctls on
// the static default allowlist, not those on the custom allowlist provided by the admin. // the static default allowlist, not those on the custom allowlist provided by the admin.
func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error { func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
sysctl = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl) sysctl = utilsysctl.NormalizeName(sysctl)
nsErrorFmt := "%q not allowed with host %s enabled" nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found { if ns, found := w.sysctls[sysctl]; found {
if ns == utilsysctl.IpcNamespace && hostIPC { if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
if ns == utilsysctl.NetNamespace && hostNet { if ns == utilsysctl.NetNamespace && hostNet {
@ -95,7 +89,7 @@ func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool)
} }
for p, ns := range w.prefixes { for p, ns := range w.prefixes {
if strings.HasPrefix(sysctl, p) { if strings.HasPrefix(sysctl, p) {
if ns == utilsysctl.IpcNamespace && hostIPC { if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
if ns == utilsysctl.NetNamespace && hostNet { if ns == utilsysctl.NetNamespace && hostNet {

View File

@ -1,3 +1,6 @@
//go:build linux
// +build linux
/* /*
Copyright 2016 The Kubernetes Authors. Copyright 2016 The Kubernetes Authors.
@ -17,9 +20,10 @@ limitations under the License.
package sysctl package sysctl
import ( import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"testing" "testing"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
) )
func TestNewAllowlist(t *testing.T) { func TestNewAllowlist(t *testing.T) {

View File

@ -29,7 +29,7 @@ func ConvertPodSysctlsVariableToDotsSeparator(securityContext *v1.PodSecurityCon
return return
} }
for i, sysctl := range securityContext.Sysctls { for i, sysctl := range securityContext.Sysctls {
securityContext.Sysctls[i].Name = utilsysctl.ConvertSysctlVariableToDotsSeparator(sysctl.Name) securityContext.Sysctls[i].Name = utilsysctl.NormalizeName(sysctl.Name)
} }
return return
} }

View File

@ -24,37 +24,81 @@ import (
type Namespace string type Namespace string
const ( const (
// refer to https://man7.org/linux/man-pages/man7/ipc_namespaces.7.html
// the Linux IPC namespace // the Linux IPC namespace
IpcNamespace = Namespace("ipc") IPCNamespace = Namespace("IPC")
// refer to https://man7.org/linux/man-pages/man7/network_namespaces.7.html
// the network namespace // the network namespace
NetNamespace = Namespace("net") NetNamespace = Namespace("Net")
// the zero value if no namespace is known // the zero value if no namespace is known
UnknownNamespace = Namespace("") UnknownNamespace = Namespace("")
) )
var namespaces = map[string]Namespace{ var nameToNamespace = map[string]Namespace{
"kernel.sem": IpcNamespace, // kernel semaphore parameters: SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
"kernel.sem": IPCNamespace,
// kernel shared memory limits include shmall, shmmax, shmmni, and shm_rmid_forced.
"kernel.shmall": IPCNamespace,
"kernel.shmmax": IPCNamespace,
"kernel.shmmni": IPCNamespace,
"kernel.shm_rmid_forced": IPCNamespace,
// make backward compatibility to know the namespace of kernel.shm*
"kernel.shm": IPCNamespace,
// kernel messages include msgmni, msgmax and msgmnb.
"kernel.msgmax": IPCNamespace,
"kernel.msgmnb": IPCNamespace,
"kernel.msgmni": IPCNamespace,
// make backward compatibility to know the namespace of kernel.msg*
"kernel.msg": IPCNamespace,
} }
var prefixNamespaces = map[string]Namespace{ var prefixToNamespace = map[string]Namespace{
"kernel.shm": IpcNamespace, "net": NetNamespace,
"kernel.msg": IpcNamespace, // mqueue filesystem provides the necessary kernel features to enable the creation
"fs.mqueue.": IpcNamespace, // of a user space library that implements the POSIX message queues API.
"net.": NetNamespace, "fs.mqueue": IPCNamespace,
} }
// NamespacedBy returns the namespace of the Linux kernel for a sysctl, or // namespaceOf returns the namespace of the Linux kernel for a sysctl, or
// unknownNamespace if the sysctl is not known to be namespaced. // unknownNamespace if the sysctl is not known to be namespaced.
func NamespacedBy(val string) Namespace { // The second return is prefixed bool.
if ns, found := namespaces[val]; found { // It returns true if the key is prefixed with a key in the prefix map
func namespaceOf(val string) Namespace {
if ns, found := nameToNamespace[val]; found {
return ns return ns
} }
for p, ns := range prefixNamespaces { for p, ns := range prefixToNamespace {
if strings.HasPrefix(val, p) { if strings.HasPrefix(val, p+".") {
return ns return ns
} }
} }
return UnknownNamespace return UnknownNamespace
} }
// GetNamespace extracts information from a sysctl string. It returns:
// 1. The sysctl namespace, which can be one of the following: IPC, Net, or unknown.
// 2. sysctlOrPrefix: the prefix of the sysctl parameter until the first '*'.
// If there is no '*', it will be the original string.
// 3. 'prefixed' is set to true if the sysctl parameter contains '*' or it is in the prefixToNamespace key list, in most cases, it is a suffix *.
//
// For example, if the input sysctl is 'net.ipv6.neigh.*', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.neigh'
// - 'prefixed' set to true
//
// For the input sysctl 'net.ipv6.conf.all.disable_ipv6', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.conf.all.disable_ipv6'
// - 'prefixed' set to false.
func GetNamespace(sysctl string) (ns Namespace, sysctlOrPrefix string, prefixed bool) {
sysctlOrPrefix = NormalizeName(sysctl)
firstIndex := strings.IndexAny(sysctlOrPrefix, "*")
if firstIndex != -1 {
sysctlOrPrefix = sysctlOrPrefix[:firstIndex]
prefixed = true
}
ns = namespaceOf(sysctlOrPrefix)
return
}

View File

@ -20,16 +20,16 @@ import (
"testing" "testing"
) )
func TestNamespacedBy(t *testing.T) { func TestNamespacedOf(t *testing.T) {
tests := map[string]Namespace{ tests := map[string]Namespace{
"kernel.shm_rmid_forced": IpcNamespace, "kernel.shm_rmid_forced": IPCNamespace,
"net.a.b.c": NetNamespace, "net.a.b.c": NetNamespace,
"fs.mqueue.a.b.c": IpcNamespace, "fs.mqueue.a.b.c": IPCNamespace,
"foo": UnknownNamespace, "foo": UnknownNamespace,
} }
for sysctl, ns := range tests { for sysctl, ns := range tests {
if got := NamespacedBy(sysctl); got != ns { if got := namespaceOf(sysctl); got != ns {
t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns) t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns)
} }
} }

View File

@ -99,22 +99,25 @@ func (*procSysctl) SetSysctl(sysctl string, newVal int) error {
return os.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640) return os.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640)
} }
// ConvertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format. // NormalizeName can return sysctl variables in dots separator format.
// The '/' separator is also accepted in place of a '.'. // The '/' separator is also accepted in place of a '.'.
// Convert the sysctl variables to dots separator format for validation. // Convert the sysctl variables to dots separator format for validation.
// More info: // More info:
// //
// https://man7.org/linux/man-pages/man8/sysctl.8.html // https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html // https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func ConvertSysctlVariableToDotsSeparator(val string) string { func NormalizeName(val string) string {
if val == "" { if val == "" {
return val return val
} }
firstSepIndex := strings.IndexAny(val, "./") firstSepIndex := strings.IndexAny(val, "./")
// if the first found is `.` like `net.ipv4.conf.eno2/100.rp_filter`
if firstSepIndex == -1 || val[firstSepIndex] == '.' { if firstSepIndex == -1 || val[firstSepIndex] == '.' {
return val return val
} }
// for `net/ipv4/conf/eno2.100/rp_filter`, swap the use of `.` and `/`
// to `net.ipv4.conf.eno2/100.rp_filter`
f := func(r rune) rune { f := func(r rune) rune {
switch r { switch r {
case '.': case '.':

View File

@ -40,7 +40,7 @@ func TestConvertSysctlVariableToDotsSeparator(t *testing.T) {
} }
for _, test := range valid { for _, test := range valid {
convertSysctlVal := ConvertSysctlVariableToDotsSeparator(test.in) convertSysctlVal := NormalizeName(test.in)
assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out) assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out)
} }
} }