Merge pull request #118705 from pacoxu/hostnet-sysctl-rejects

forbid sysctls for pod sharing the respective namespaces with the host
This commit is contained in:
Kubernetes Prow Robot 2023-10-28 09:48:44 +02:00 committed by GitHub
commit 57c7b666c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 316 additions and 152 deletions

View File

@ -365,6 +365,7 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
AllowInvalidLabelValueInSelector: false, AllowInvalidLabelValueInSelector: false,
AllowInvalidTopologySpreadConstraintLabelSelector: false, AllowInvalidTopologySpreadConstraintLabelSelector: false,
AllowMutableNodeSelectorAndNodeAffinity: utilfeature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness), AllowMutableNodeSelectorAndNodeAffinity: utilfeature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
} }
if oldPodSpec != nil { if oldPodSpec != nil {
@ -377,6 +378,17 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po
opts.AllowInvalidLabelValueInSelector = hasInvalidLabelValueInAffinitySelector(oldPodSpec) opts.AllowInvalidLabelValueInSelector = hasInvalidLabelValueInAffinitySelector(oldPodSpec)
// if old spec has invalid labelSelector in topologySpreadConstraint, we must allow it // if old spec has invalid labelSelector in topologySpreadConstraint, we must allow it
opts.AllowInvalidTopologySpreadConstraintLabelSelector = hasInvalidTopologySpreadConstraintLabelSelector(oldPodSpec) opts.AllowInvalidTopologySpreadConstraintLabelSelector = hasInvalidTopologySpreadConstraintLabelSelector(oldPodSpec)
// if old spec has invalid sysctl with hostNet or hostIPC, we must allow it when update
if oldPodSpec.SecurityContext != nil && len(oldPodSpec.SecurityContext.Sysctls) != 0 {
for _, s := range oldPodSpec.SecurityContext.Sysctls {
err := apivalidation.ValidateHostSysctl(s.Name, oldPodSpec.SecurityContext, nil)
if err != nil {
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
break
}
}
}
} }
if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost { if oldPodMeta != nil && !opts.AllowInvalidPodDeletionCost {
// This is an update, so validate only if the existing object was valid. // This is an update, so validate only if the existing object was valid.

View File

@ -44,6 +44,7 @@ import (
"k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/validation/field"
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
utilsysctl "k8s.io/component-helpers/node/util/sysctl"
schedulinghelper "k8s.io/component-helpers/scheduling/corev1" schedulinghelper "k8s.io/component-helpers/scheduling/corev1"
kubeletapis "k8s.io/kubelet/pkg/apis" kubeletapis "k8s.io/kubelet/pkg/apis"
apiservice "k8s.io/kubernetes/pkg/api/service" apiservice "k8s.io/kubernetes/pkg/api/service"
@ -3812,6 +3813,8 @@ type PodValidationOptions struct {
AllowInvalidTopologySpreadConstraintLabelSelector bool AllowInvalidTopologySpreadConstraintLabelSelector bool
// Allow node selector additions for gated pods. // Allow node selector additions for gated pods.
AllowMutableNodeSelectorAndNodeAffinity bool AllowMutableNodeSelectorAndNodeAffinity bool
// Allow namespaced sysctls in hostNet and hostIPC pods
AllowNamespacedSysctlsForHostNetAndHostIPC bool
// The top-level resource being validated is a Pod, not just a PodSpec // The top-level resource being validated is a Pod, not just a PodSpec
// embedded in some other resource. // embedded in some other resource.
ResourceIsPod bool ResourceIsPod bool
@ -4562,10 +4565,10 @@ func IsValidSysctlName(name string) bool {
return sysctlContainSlashRegexp.MatchString(name) return sysctlContainSlashRegexp.MatchString(name)
} }
func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path) field.ErrorList { func validateSysctls(securityContext *core.PodSecurityContext, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
allErrs := field.ErrorList{} allErrs := field.ErrorList{}
names := make(map[string]struct{}) names := make(map[string]struct{})
for i, s := range sysctls { for i, s := range securityContext.Sysctls {
if len(s.Name) == 0 { if len(s.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), "")) allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
} else if !IsValidSysctlName(s.Name) { } else if !IsValidSysctlName(s.Name) {
@ -4573,11 +4576,29 @@ func validateSysctls(sysctls []core.Sysctl, fldPath *field.Path) field.ErrorList
} else if _, ok := names[s.Name]; ok { } else if _, ok := names[s.Name]; ok {
allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name)) allErrs = append(allErrs, field.Duplicate(fldPath.Index(i).Child("name"), s.Name))
} }
if !opts.AllowNamespacedSysctlsForHostNetAndHostIPC {
err := ValidateHostSysctl(s.Name, securityContext, fldPath.Index(i).Child("name"))
if err != nil {
allErrs = append(allErrs, err)
}
}
names[s.Name] = struct{}{} names[s.Name] = struct{}{}
} }
return allErrs return allErrs
} }
// ValidateHostSysctl will return error if namespaced sysctls is applied to pod sharing the respective namespaces with the host.
func ValidateHostSysctl(sysctl string, securityContext *core.PodSecurityContext, fldPath *field.Path) *field.Error {
ns, _, _ := utilsysctl.GetNamespace(sysctl)
switch {
case securityContext.HostNetwork && ns == utilsysctl.NetNamespace:
return field.Invalid(fldPath, sysctl, "may not be specified when 'hostNetwork' is true")
case securityContext.HostIPC && ns == utilsysctl.IPCNamespace:
return field.Invalid(fldPath, sysctl, "may not be specified when 'hostIPC' is true")
}
return nil
}
// validatePodSpecSecurityContext verifies the SecurityContext of a PodSpec, // validatePodSpecSecurityContext verifies the SecurityContext of a PodSpec,
// whether that is defined in a Pod or in an embedded PodSpec (e.g. a // whether that is defined in a Pod or in an embedded PodSpec (e.g. a
// Deployment's pod template). // Deployment's pod template).
@ -4610,7 +4631,7 @@ func validatePodSpecSecurityContext(securityContext *core.PodSecurityContext, sp
} }
if len(securityContext.Sysctls) != 0 { if len(securityContext.Sysctls) != 0 {
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"))...) allErrs = append(allErrs, validateSysctls(securityContext, fldPath.Child("sysctls"), opts)...)
} }
if securityContext.FSGroupChangePolicy != nil { if securityContext.FSGroupChangePolicy != nil {

View File

@ -21491,16 +21491,32 @@ func TestValidateSysctls(t *testing.T) {
"_invalid", "_invalid",
} }
invalidWithHostNet := []string{
"net.ipv4.conf.enp3s0/200.forwarding",
"net/ipv4/conf/enp3s0.200/forwarding",
}
invalidWithHostIPC := []string{
"kernel.shmmax",
"kernel.msgmax",
}
duplicates := []string{ duplicates := []string{
"kernel.shmmax", "kernel.shmmax",
"kernel.shmmax", "kernel.shmmax",
} }
opts := PodValidationOptions{
AllowNamespacedSysctlsForHostNetAndHostIPC: false,
}
sysctls := make([]core.Sysctl, len(valid)) sysctls := make([]core.Sysctl, len(valid))
validSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
for i, sysctl := range valid { for i, sysctl := range valid {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs := validateSysctls(sysctls, field.NewPath("foo")) errs := validateSysctls(validSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 0 { if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} }
@ -21509,7 +21525,10 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range invalid { for i, sysctl := range invalid {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo")) inValidSecurityContext := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(inValidSecurityContext, field.NewPath("foo"), opts)
if len(errs) != 2 { if len(errs) != 2 {
t.Errorf("expected 2 validation errors. Got: %v", errs) t.Errorf("expected 2 validation errors. Got: %v", errs)
} else { } else {
@ -21525,12 +21544,54 @@ func TestValidateSysctls(t *testing.T) {
for i, sysctl := range duplicates { for i, sysctl := range duplicates {
sysctls[i].Name = sysctl sysctls[i].Name = sysctl
} }
errs = validateSysctls(sysctls, field.NewPath("foo")) securityContextWithDup := &core.PodSecurityContext{
Sysctls: sysctls,
}
errs = validateSysctls(securityContextWithDup, field.NewPath("foo"), opts)
if len(errs) != 1 { if len(errs) != 1 {
t.Errorf("unexpected validation errors: %v", errs) t.Errorf("unexpected validation errors: %v", errs)
} else if errs[0].Type != field.ErrorTypeDuplicate { } else if errs[0].Type != field.ErrorTypeDuplicate {
t.Errorf("expected error type %v, got %v", field.ErrorTypeDuplicate, errs[0].Type) t.Errorf("expected error type %v, got %v", field.ErrorTypeDuplicate, errs[0].Type)
} }
sysctls = make([]core.Sysctl, len(invalidWithHostNet))
for i, sysctl := range invalidWithHostNet {
sysctls[i].Name = sysctl
}
invalidSecurityContextWithHostNet := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: false,
HostNetwork: true,
}
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs)
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostNet, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
sysctls = make([]core.Sysctl, len(invalidWithHostIPC))
for i, sysctl := range invalidWithHostIPC {
sysctls[i].Name = sysctl
}
invalidSecurityContextWithHostIPC := &core.PodSecurityContext{
Sysctls: sysctls,
HostIPC: true,
HostNetwork: false,
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = false
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 2 {
t.Errorf("unexpected validation errors: %v", errs)
}
opts.AllowNamespacedSysctlsForHostNetAndHostIPC = true
errs = validateSysctls(invalidSecurityContextWithHostIPC, field.NewPath("foo"), opts)
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
} }
func newNodeNameEndpoint(nodeName string) *core.Endpoints { func newNodeNameEndpoint(nodeName string) *core.Endpoints {

View File

@ -20,6 +20,7 @@ import (
"fmt" "fmt"
"strings" "strings"
utilsysctl "k8s.io/component-helpers/node/util/sysctl"
"k8s.io/kubernetes/pkg/apis/core/validation" "k8s.io/kubernetes/pkg/apis/core/validation"
policyvalidation "k8s.io/kubernetes/pkg/apis/policy/validation" policyvalidation "k8s.io/kubernetes/pkg/apis/policy/validation"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
@ -33,8 +34,8 @@ const (
// checks validity via a sysctl and prefix map, rejecting those which are not known // checks validity via a sysctl and prefix map, rejecting those which are not known
// to be namespaced. // to be namespaced.
type patternAllowlist struct { type patternAllowlist struct {
sysctls map[string]Namespace sysctls map[string]utilsysctl.Namespace
prefixes map[string]Namespace prefixes map[string]utilsysctl.Namespace
} }
var _ lifecycle.PodAdmitHandler = &patternAllowlist{} var _ lifecycle.PodAdmitHandler = &patternAllowlist{}
@ -42,8 +43,8 @@ var _ lifecycle.PodAdmitHandler = &patternAllowlist{}
// NewAllowlist creates a new Allowlist from a list of sysctls and sysctl pattern (ending in *). // NewAllowlist creates a new Allowlist from a list of sysctls and sysctl pattern (ending in *).
func NewAllowlist(patterns []string) (*patternAllowlist, error) { func NewAllowlist(patterns []string) (*patternAllowlist, error) {
w := &patternAllowlist{ w := &patternAllowlist{
sysctls: map[string]Namespace{}, sysctls: map[string]utilsysctl.Namespace{},
prefixes: map[string]Namespace{}, prefixes: map[string]utilsysctl.Namespace{},
} }
for _, s := range patterns { for _, s := range patterns {
@ -54,20 +55,14 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
policyvalidation.SysctlContainSlashPatternFmt, policyvalidation.SysctlContainSlashPatternFmt,
) )
} }
s = convertSysctlVariableToDotsSeparator(s) ns, sysctlOrPrefix, prefixed := utilsysctl.GetNamespace(s)
if strings.HasSuffix(s, "*") { if ns == utilsysctl.UnknownNamespace {
prefix := s[:len(s)-1] return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", sysctlOrPrefix)
ns := NamespacedBy(prefix) }
if ns == unknownNamespace { if prefixed {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s) w.prefixes[sysctlOrPrefix] = ns
}
w.prefixes[prefix] = ns
} else { } else {
ns := NamespacedBy(s) w.sysctls[sysctlOrPrefix] = ns
if ns == unknownNamespace {
return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
}
w.sysctls[s] = ns
} }
} }
return w, nil return w, nil
@ -81,23 +76,23 @@ func NewAllowlist(patterns []string) (*patternAllowlist, error) {
// respective namespaces with the host. This check is only possible for sysctls on // respective namespaces with the host. This check is only possible for sysctls on
// the static default allowlist, not those on the custom allowlist provided by the admin. // the static default allowlist, not those on the custom allowlist provided by the admin.
func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error { func (w *patternAllowlist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
sysctl = convertSysctlVariableToDotsSeparator(sysctl) sysctl = utilsysctl.NormalizeName(sysctl)
nsErrorFmt := "%q not allowed with host %s enabled" nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found { if ns, found := w.sysctls[sysctl]; found {
if ns == ipcNamespace && hostIPC { if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
if ns == netNamespace && hostNet { if ns == utilsysctl.NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
return nil return nil
} }
for p, ns := range w.prefixes { for p, ns := range w.prefixes {
if strings.HasPrefix(sysctl, p) { if strings.HasPrefix(sysctl, p) {
if ns == ipcNamespace && hostIPC { if ns == utilsysctl.IPCNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
if ns == netNamespace && hostNet { if ns == utilsysctl.NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns) return fmt.Errorf(nsErrorFmt, sysctl, ns)
} }
return nil return nil

View File

@ -1,3 +1,6 @@
//go:build linux
// +build linux
/* /*
Copyright 2016 The Kubernetes Authors. Copyright 2016 The Kubernetes Authors.
@ -17,9 +20,10 @@ limitations under the License.
package sysctl package sysctl
import ( import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"testing" "testing"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
) )
func TestNewAllowlist(t *testing.T) { func TestNewAllowlist(t *testing.T) {

View File

@ -1,60 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"strings"
)
// Namespace represents a kernel namespace name.
type Namespace string
const (
// the Linux IPC namespace
ipcNamespace = Namespace("ipc")
// the network namespace
netNamespace = Namespace("net")
// the zero value if no namespace is known
unknownNamespace = Namespace("")
)
var namespaces = map[string]Namespace{
"kernel.sem": ipcNamespace,
}
var prefixNamespaces = map[string]Namespace{
"kernel.shm": ipcNamespace,
"kernel.msg": ipcNamespace,
"fs.mqueue.": ipcNamespace,
"net.": netNamespace,
}
// NamespacedBy returns the namespace of the Linux kernel for a sysctl, or
// unknownNamespace if the sysctl is not known to be namespaced.
func NamespacedBy(val string) Namespace {
if ns, found := namespaces[val]; found {
return ns
}
for p, ns := range prefixNamespaces {
if strings.HasPrefix(val, p) {
return ns
}
}
return unknownNamespace
}

View File

@ -17,39 +17,10 @@ limitations under the License.
package sysctl package sysctl
import ( import (
"strings" v1 "k8s.io/api/core/v1"
utilsysctl "k8s.io/component-helpers/node/util/sysctl"
"k8s.io/api/core/v1"
) )
// convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
// The '/' separator is also accepted in place of a '.'.
// Convert the sysctl variables to dots separator format for validation.
// More info:
//
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func convertSysctlVariableToDotsSeparator(val string) string {
if val == "" {
return val
}
firstSepIndex := strings.IndexAny(val, "./")
if firstSepIndex == -1 || val[firstSepIndex] == '.' {
return val
}
f := func(r rune) rune {
switch r {
case '.':
return '/'
case '/':
return '.'
}
return r
}
return strings.Map(f, val)
}
// ConvertPodSysctlsVariableToDotsSeparator converts sysctls variable in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator // ConvertPodSysctlsVariableToDotsSeparator converts sysctls variable in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator
// according to the linux sysctl conversion rules. // according to the linux sysctl conversion rules.
// see https://man7.org/linux/man-pages/man5/sysctl.d.5.html for more details. // see https://man7.org/linux/man-pages/man5/sysctl.d.5.html for more details.
@ -58,7 +29,7 @@ func ConvertPodSysctlsVariableToDotsSeparator(securityContext *v1.PodSecurityCon
return return
} }
for i, sysctl := range securityContext.Sysctls { for i, sysctl := range securityContext.Sysctls {
securityContext.Sysctls[i].Name = convertSysctlVariableToDotsSeparator(sysctl.Name) securityContext.Sysctls[i].Name = utilsysctl.NormalizeName(sysctl.Name)
} }
return return
} }

View File

@ -24,29 +24,6 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
// TestConvertSysctlVariableToDotsSeparator tests whether the sysctl variable
// can be correctly converted to a dot as a separator.
func TestConvertSysctlVariableToDotsSeparator(t *testing.T) {
type testCase struct {
in string
out string
}
valid := []testCase{
{in: "kernel.shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "kernel/shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "net.ipv4.conf.eno2/100.rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/conf/eno2.100/rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/ip_local_port_range", out: "net.ipv4.ip_local_port_range"},
{in: "kernel/msgmax", out: "kernel.msgmax"},
{in: "kernel/sem", out: "kernel.sem"},
}
for _, test := range valid {
convertSysctlVal := convertSysctlVariableToDotsSeparator(test.in)
assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out)
}
}
// TestConvertPodSysctlsVariableToDotsSeparator tests whether the sysctls variable // TestConvertPodSysctlsVariableToDotsSeparator tests whether the sysctls variable
// can be correctly converted to a dot as a separator. // can be correctly converted to a dot as a separator.
func TestConvertPodSysctlsVariableToDotsSeparator(t *testing.T) { func TestConvertPodSysctlsVariableToDotsSeparator(t *testing.T) {

View File

@ -6,6 +6,7 @@ go 1.21.3
require ( require (
github.com/google/go-cmp v0.6.0 github.com/google/go-cmp v0.6.0
github.com/stretchr/testify v1.8.4
k8s.io/api v0.0.0 k8s.io/api v0.0.0
k8s.io/apimachinery v0.0.0 k8s.io/apimachinery v0.0.0
k8s.io/client-go v0.0.0 k8s.io/client-go v0.0.0
@ -33,6 +34,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/net v0.17.0 // indirect golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/sys v0.13.0 // indirect golang.org/x/sys v0.13.0 // indirect

View File

@ -0,0 +1,104 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"strings"
)
// Namespace represents a kernel namespace name.
type Namespace string
const (
// refer to https://man7.org/linux/man-pages/man7/ipc_namespaces.7.html
// the Linux IPC namespace
IPCNamespace = Namespace("IPC")
// refer to https://man7.org/linux/man-pages/man7/network_namespaces.7.html
// the network namespace
NetNamespace = Namespace("Net")
// the zero value if no namespace is known
UnknownNamespace = Namespace("")
)
var nameToNamespace = map[string]Namespace{
// kernel semaphore parameters: SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
"kernel.sem": IPCNamespace,
// kernel shared memory limits include shmall, shmmax, shmmni, and shm_rmid_forced.
"kernel.shmall": IPCNamespace,
"kernel.shmmax": IPCNamespace,
"kernel.shmmni": IPCNamespace,
"kernel.shm_rmid_forced": IPCNamespace,
// make backward compatibility to know the namespace of kernel.shm*
"kernel.shm": IPCNamespace,
// kernel messages include msgmni, msgmax and msgmnb.
"kernel.msgmax": IPCNamespace,
"kernel.msgmnb": IPCNamespace,
"kernel.msgmni": IPCNamespace,
// make backward compatibility to know the namespace of kernel.msg*
"kernel.msg": IPCNamespace,
}
var prefixToNamespace = map[string]Namespace{
"net": NetNamespace,
// mqueue filesystem provides the necessary kernel features to enable the creation
// of a user space library that implements the POSIX message queues API.
"fs.mqueue": IPCNamespace,
}
// namespaceOf returns the namespace of the Linux kernel for a sysctl, or
// unknownNamespace if the sysctl is not known to be namespaced.
// The second return is prefixed bool.
// It returns true if the key is prefixed with a key in the prefix map
func namespaceOf(val string) Namespace {
if ns, found := nameToNamespace[val]; found {
return ns
}
for p, ns := range prefixToNamespace {
if strings.HasPrefix(val, p+".") {
return ns
}
}
return UnknownNamespace
}
// GetNamespace extracts information from a sysctl string. It returns:
// 1. The sysctl namespace, which can be one of the following: IPC, Net, or unknown.
// 2. sysctlOrPrefix: the prefix of the sysctl parameter until the first '*'.
// If there is no '*', it will be the original string.
// 3. 'prefixed' is set to true if the sysctl parameter contains '*' or it is in the prefixToNamespace key list, in most cases, it is a suffix *.
//
// For example, if the input sysctl is 'net.ipv6.neigh.*', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.neigh'
// - 'prefixed' set to true
//
// For the input sysctl 'net.ipv6.conf.all.disable_ipv6', GetNamespace will return:
// - The Net namespace
// - The sysctlOrPrefix as 'net.ipv6.conf.all.disable_ipv6'
// - 'prefixed' set to false.
func GetNamespace(sysctl string) (ns Namespace, sysctlOrPrefix string, prefixed bool) {
sysctlOrPrefix = NormalizeName(sysctl)
firstIndex := strings.IndexAny(sysctlOrPrefix, "*")
if firstIndex != -1 {
sysctlOrPrefix = sysctlOrPrefix[:firstIndex]
prefixed = true
}
ns = namespaceOf(sysctlOrPrefix)
return
}

View File

@ -1,5 +1,5 @@
/* /*
Copyright 2016 The Kubernetes Authors. Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -20,16 +20,16 @@ import (
"testing" "testing"
) )
func TestNamespacedBy(t *testing.T) { func TestNamespacedOf(t *testing.T) {
tests := map[string]Namespace{ tests := map[string]Namespace{
"kernel.shm_rmid_forced": ipcNamespace, "kernel.shm_rmid_forced": IPCNamespace,
"net.a.b.c": netNamespace, "net.a.b.c": NetNamespace,
"fs.mqueue.a.b.c": ipcNamespace, "fs.mqueue.a.b.c": IPCNamespace,
"foo": unknownNamespace, "foo": UnknownNamespace,
} }
for sysctl, ns := range tests { for sysctl, ns := range tests {
if got := NamespacedBy(sysctl); got != ns { if got := namespaceOf(sysctl); got != ns {
t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns) t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns)
} }
} }

View File

@ -98,3 +98,34 @@ func (*procSysctl) GetSysctl(sysctl string) (int, error) {
func (*procSysctl) SetSysctl(sysctl string, newVal int) error { func (*procSysctl) SetSysctl(sysctl string, newVal int) error {
return os.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640) return os.WriteFile(path.Join(sysctlBase, sysctl), []byte(strconv.Itoa(newVal)), 0640)
} }
// NormalizeName can return sysctl variables in dots separator format.
// The '/' separator is also accepted in place of a '.'.
// Convert the sysctl variables to dots separator format for validation.
// More info:
//
// https://man7.org/linux/man-pages/man8/sysctl.8.html
// https://man7.org/linux/man-pages/man5/sysctl.d.5.html
func NormalizeName(val string) string {
if val == "" {
return val
}
firstSepIndex := strings.IndexAny(val, "./")
// if the first found is `.` like `net.ipv4.conf.eno2/100.rp_filter`
if firstSepIndex == -1 || val[firstSepIndex] == '.' {
return val
}
// for `net/ipv4/conf/eno2.100/rp_filter`, swap the use of `.` and `/`
// to `net.ipv4.conf.eno2/100.rp_filter`
f := func(r rune) rune {
switch r {
case '.':
return '/'
case '/':
return '.'
}
return r
}
return strings.Map(f, val)
}

View File

@ -0,0 +1,46 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"testing"
"github.com/stretchr/testify/assert"
)
// TestConvertSysctlVariableToDotsSeparator tests whether the sysctl variable
// can be correctly converted to a dot as a separator.
func TestConvertSysctlVariableToDotsSeparator(t *testing.T) {
type testCase struct {
in string
out string
}
valid := []testCase{
{in: "kernel.shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "kernel/shm_rmid_forced", out: "kernel.shm_rmid_forced"},
{in: "net.ipv4.conf.eno2/100.rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/conf/eno2.100/rp_filter", out: "net.ipv4.conf.eno2/100.rp_filter"},
{in: "net/ipv4/ip_local_port_range", out: "net.ipv4.ip_local_port_range"},
{in: "kernel/msgmax", out: "kernel.msgmax"},
{in: "kernel/sem", out: "kernel.sem"},
}
for _, test := range valid {
convertSysctlVal := NormalizeName(test.in)
assert.Equalf(t, test.out, convertSysctlVal, "The sysctl variable was not converted correctly. got: %s, want: %s", convertSysctlVal, test.out)
}
}