diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index cf41c82d240..470618222c3 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -261,6 +261,7 @@ API rule violation: names_match,k8s.io/kube-proxy/config/v1alpha1,KubeProxyConfi API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,IPTablesDropBit API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,IPTablesMasqueradeBit API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ResolverConfig +API rule violation: names_match,k8s.io/kubelet/config/v1beta1,UserNamespaces,IDsPerPod API rule violation: names_match,k8s.io/metrics/pkg/apis/custom_metrics/v1beta1,MetricValue,WindowSeconds API rule violation: names_match,k8s.io/metrics/pkg/apis/external_metrics/v1beta1,ExternalMetricValue,WindowSeconds API rule violation: streaming_list_type_proto_tags,k8s.io/apimachinery/pkg/apis/meta/v1beta1,PartialObjectMetadataList,Items diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index c401a2f23a1..d66ee142a28 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -1280,6 +1280,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "k8s.io/kubelet/config/v1beta1.MemorySwapConfiguration": schema_k8sio_kubelet_config_v1beta1_MemorySwapConfiguration(ref), "k8s.io/kubelet/config/v1beta1.SerializedNodeConfigSource": schema_k8sio_kubelet_config_v1beta1_SerializedNodeConfigSource(ref), "k8s.io/kubelet/config/v1beta1.ShutdownGracePeriodByPodPriority": schema_k8sio_kubelet_config_v1beta1_ShutdownGracePeriodByPodPriority(ref), + "k8s.io/kubelet/config/v1beta1.UserNamespaces": schema_k8sio_kubelet_config_v1beta1_UserNamespaces(ref), "k8s.io/kubernetes/pkg/apis/abac/v1beta1.Policy": schema_pkg_apis_abac_v1beta1_Policy(ref), "k8s.io/kubernetes/pkg/apis/abac/v1beta1.PolicySpec": schema_pkg_apis_abac_v1beta1_PolicySpec(ref), "k8s.io/metrics/pkg/apis/custom_metrics/v1beta1.MetricListOptions": schema_pkg_apis_custom_metrics_v1beta1_MetricListOptions(ref), @@ -66201,12 +66202,18 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen Format: "", }, }, + "userNamespaces": { + SchemaProps: spec.SchemaProps{ + Description: "UserNamespaces contains User Namespace configurations.", + Ref: ref("k8s.io/kubelet/config/v1beta1.UserNamespaces"), + }, + }, }, Required: []string{"containerRuntimeEndpoint"}, }, }, Dependencies: []string{ - "k8s.io/api/core/v1.Taint", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/component-base/logs/api/v1.LoggingConfiguration", "k8s.io/component-base/tracing/api/v1.TracingConfiguration", "k8s.io/kubelet/config/v1beta1.CrashLoopBackOffConfig", "k8s.io/kubelet/config/v1beta1.KubeletAuthentication", "k8s.io/kubelet/config/v1beta1.KubeletAuthorization", "k8s.io/kubelet/config/v1beta1.MemoryReservation", "k8s.io/kubelet/config/v1beta1.MemorySwapConfiguration", "k8s.io/kubelet/config/v1beta1.ShutdownGracePeriodByPodPriority"}, + "k8s.io/api/core/v1.Taint", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/component-base/logs/api/v1.LoggingConfiguration", "k8s.io/component-base/tracing/api/v1.TracingConfiguration", "k8s.io/kubelet/config/v1beta1.CrashLoopBackOffConfig", "k8s.io/kubelet/config/v1beta1.KubeletAuthentication", "k8s.io/kubelet/config/v1beta1.KubeletAuthorization", "k8s.io/kubelet/config/v1beta1.MemoryReservation", "k8s.io/kubelet/config/v1beta1.MemorySwapConfiguration", "k8s.io/kubelet/config/v1beta1.ShutdownGracePeriodByPodPriority", "k8s.io/kubelet/config/v1beta1.UserNamespaces"}, } } @@ -66403,6 +66410,26 @@ func schema_k8sio_kubelet_config_v1beta1_ShutdownGracePeriodByPodPriority(ref co } } +func schema_k8sio_kubelet_config_v1beta1_UserNamespaces(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "UserNamespaces contains User Namespace configurations.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "idsPerPod": { + SchemaProps: spec.SchemaProps{ + Description: "IDsPerPod is the mapping length of UIDs and GIDs. The length must be a multiple of 65536, and must be less than 1<<32. On non-linux such as windows, only null / absent is allowed.\n\nChanging the value may require recreating all containers on the node.\n\nDefault: 65536", + Type: []string{"integer"}, + Format: "int64", + }, + }, + }, + }, + }, + } +} + func schema_pkg_apis_abac_v1beta1_Policy(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index b170def02dc..48ea219d273 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -306,5 +306,6 @@ var ( "LocalStorageCapacityIsolation", "FailCgroupV1", "CrashLoopBackOff.MaxContainerRestartPeriod", + "UserNamespaces.IDsPerPod", ) ) diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index d295f16122c..1cc258880a1 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -541,6 +541,11 @@ type KubeletConfiguration struct { // +featureGate=KubeletCrashLoopBackoffMax // +optional CrashLoopBackOff CrashLoopBackOffConfig + + // UserNamespaces contains User Namespace configurations. + // +featureGate=UserNamespaceSupport + // +optional + UserNamespaces *UserNamespaces } // KubeletAuthorizationMode denotes the authorization mode for the kubelet @@ -878,3 +883,17 @@ type ImagePullSecret struct { // content of the secret specified by the UID/Namespace/Name coordinates. CredentialHash string } + +// UserNamespaces contains User Namespace configurations. +type UserNamespaces struct { + // IDsPerPod is the mapping length of UIDs and GIDs. + // The length must be a multiple of 65536, and must be less than 1<<32. + // On non-linux such as windows, only null / absent is allowed. + // + // Changing the value may require recreating all containers on the node. + // + // Default: 65536 + // +featureGate=UserNamespaceSupport + // +optional + IDsPerPod *int64 +} diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index 377fc235395..917d40a5e84 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -185,6 +185,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*configv1beta1.UserNamespaces)(nil), (*config.UserNamespaces)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_UserNamespaces_To_config_UserNamespaces(a.(*configv1beta1.UserNamespaces), b.(*config.UserNamespaces), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*config.UserNamespaces)(nil), (*configv1beta1.UserNamespaces)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_UserNamespaces_To_v1beta1_UserNamespaces(a.(*config.UserNamespaces), b.(*configv1beta1.UserNamespaces), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*config.CredentialProvider)(nil), (*configv1beta1.CredentialProvider)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_config_CredentialProvider_To_v1beta1_CredentialProvider(a.(*config.CredentialProvider), b.(*configv1beta1.CredentialProvider), scope) }); err != nil { @@ -584,6 +594,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in if err := v1.Convert_Pointer_bool_To_bool(&in.FailCgroupV1, &out.FailCgroupV1, s); err != nil { return err } + out.UserNamespaces = (*config.UserNamespaces)(unsafe.Pointer(in.UserNamespaces)) return nil } @@ -789,6 +800,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in if err := Convert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(&in.CrashLoopBackOff, &out.CrashLoopBackOff, s); err != nil { return err } + out.UserNamespaces = (*configv1beta1.UserNamespaces)(unsafe.Pointer(in.UserNamespaces)) return nil } @@ -948,3 +960,23 @@ func autoConvert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGrac func Convert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(in *config.ShutdownGracePeriodByPodPriority, out *configv1beta1.ShutdownGracePeriodByPodPriority, s conversion.Scope) error { return autoConvert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(in, out, s) } + +func autoConvert_v1beta1_UserNamespaces_To_config_UserNamespaces(in *configv1beta1.UserNamespaces, out *config.UserNamespaces, s conversion.Scope) error { + out.IDsPerPod = (*int64)(unsafe.Pointer(in.IDsPerPod)) + return nil +} + +// Convert_v1beta1_UserNamespaces_To_config_UserNamespaces is an autogenerated conversion function. +func Convert_v1beta1_UserNamespaces_To_config_UserNamespaces(in *configv1beta1.UserNamespaces, out *config.UserNamespaces, s conversion.Scope) error { + return autoConvert_v1beta1_UserNamespaces_To_config_UserNamespaces(in, out, s) +} + +func autoConvert_config_UserNamespaces_To_v1beta1_UserNamespaces(in *config.UserNamespaces, out *configv1beta1.UserNamespaces, s conversion.Scope) error { + out.IDsPerPod = (*int64)(unsafe.Pointer(in.IDsPerPod)) + return nil +} + +// Convert_config_UserNamespaces_To_v1beta1_UserNamespaces is an autogenerated conversion function. +func Convert_config_UserNamespaces_To_v1beta1_UserNamespaces(in *config.UserNamespaces, out *configv1beta1.UserNamespaces, s conversion.Scope) error { + return autoConvert_config_UserNamespaces_To_v1beta1_UserNamespaces(in, out, s) +} diff --git a/pkg/kubelet/apis/config/validation/validation_linux.go b/pkg/kubelet/apis/config/validation/validation_linux.go index 5216538292c..dca9c69b5a3 100644 --- a/pkg/kubelet/apis/config/validation/validation_linux.go +++ b/pkg/kubelet/apis/config/validation/validation_linux.go @@ -21,12 +21,15 @@ package validation import ( "fmt" + "math" libcontainercgroups "github.com/opencontainers/cgroups" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/utils/ptr" ) +const userNsUnitLength = 65536 + // validateKubeletOSConfiguration validates os specific kubelet configuration and returns an error if it is invalid. func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) error { isCgroup1 := !libcontainercgroups.IsCgroup2UnifiedMode() @@ -38,5 +41,20 @@ func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) erro return fmt.Errorf("invalid configuration: singleProcessOOMKill must not be explicitly set to false when using cgroup v1") } + if userNs := kc.UserNamespaces; userNs != nil { + if idsPerPod := userNs.IDsPerPod; idsPerPod != nil { + if *idsPerPod < userNsUnitLength { + return fmt.Errorf("invalid configuration: userNamespaces.idsPerPod must not be less than %d", userNsUnitLength) + } + if *idsPerPod%userNsUnitLength != 0 { + return fmt.Errorf("invalid configuration: userNamespaces.idsPerPod must be a multiple of %d", userNsUnitLength) + } + if *idsPerPod > math.MaxUint32 { + // int64() is needed for 32-bit targets + return fmt.Errorf("invalid configuration: userNamespaces.idsPerPod must not be more than %d", int64(math.MaxUint32)) + } + } + } + return nil } diff --git a/pkg/kubelet/apis/config/validation/validation_others.go b/pkg/kubelet/apis/config/validation/validation_others.go index e019421398e..34adffb8443 100644 --- a/pkg/kubelet/apis/config/validation/validation_others.go +++ b/pkg/kubelet/apis/config/validation/validation_others.go @@ -31,5 +31,9 @@ func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) erro return fmt.Errorf("invalid configuration: singleProcessOOMKill is only supported on linux") } + if kc.UserNamespaces != nil { + return fmt.Errorf("invalid configuration: userNamespaces is only supported on linux") + } + return nil } diff --git a/pkg/kubelet/apis/config/validation/validation_windows.go b/pkg/kubelet/apis/config/validation/validation_windows.go index 65765fe0db5..510d315da0d 100644 --- a/pkg/kubelet/apis/config/validation/validation_windows.go +++ b/pkg/kubelet/apis/config/validation/validation_windows.go @@ -46,5 +46,9 @@ func validateKubeletOSConfiguration(kc *kubeletconfig.KubeletConfiguration) erro klog.Warningf(message, "EnforceNodeAllocatable", "--enforce-node-allocatable", kc.EnforceNodeAllocatable) } + if kc.UserNamespaces != nil { + return fmt.Errorf("invalid configuration: userNamespaces is not supported on Windows") + } + return nil } diff --git a/pkg/kubelet/apis/config/zz_generated.deepcopy.go b/pkg/kubelet/apis/config/zz_generated.deepcopy.go index fd7828cd14a..dc1baba0fbb 100644 --- a/pkg/kubelet/apis/config/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/config/zz_generated.deepcopy.go @@ -459,6 +459,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { (*in).DeepCopyInto(*out) } in.CrashLoopBackOff.DeepCopyInto(&out.CrashLoopBackOff) + if in.UserNamespaces != nil { + in, out := &in.UserNamespaces, &out.UserNamespaces + *out = new(UserNamespaces) + (*in).DeepCopyInto(*out) + } return } @@ -642,3 +647,24 @@ func (in *ShutdownGracePeriodByPodPriority) DeepCopy() *ShutdownGracePeriodByPod in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UserNamespaces) DeepCopyInto(out *UserNamespaces) { + *out = *in + if in.IDsPerPod != nil { + in, out := &in.IDsPerPod, &out.IDsPerPod + *out = new(int64) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UserNamespaces. +func (in *UserNamespaces) DeepCopy() *UserNamespaces { + if in == nil { + return nil + } + out := new(UserNamespaces) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/kubelet/config/defaults.go b/pkg/kubelet/config/defaults.go index effee19e6e9..b8788f9e431 100644 --- a/pkg/kubelet/config/defaults.go +++ b/pkg/kubelet/config/defaults.go @@ -30,4 +30,5 @@ const ( KubeletPluginsDirSELinuxLabel = "system_u:object_r:container_file_t:s0" KubeletContainersSharedSELinuxLabel = "system_u:object_r:container_file_t:s0" DefaultKubeletCheckpointsDirName = "checkpoints" + DefaultKubeletUserNamespacesIDsPerPod = 65536 ) diff --git a/pkg/kubelet/kubelet_getters.go b/pkg/kubelet/kubelet_getters.go index ec22cde4604..491ce594dd3 100644 --- a/pkg/kubelet/kubelet_getters.go +++ b/pkg/kubelet/kubelet_getters.go @@ -140,6 +140,20 @@ func (kl *Kubelet) GetMaxPods() int { return kl.maxPods } +func (kl *Kubelet) GetUserNamespacesIDsPerPod() uint32 { + userNs := kl.kubeletConfiguration.UserNamespaces + if userNs == nil { + return config.DefaultKubeletUserNamespacesIDsPerPod + } + idsPerPod := userNs.IDsPerPod + if idsPerPod == nil || *idsPerPod == 0 { + return config.DefaultKubeletUserNamespacesIDsPerPod + } + // The value is already validated to be <= MaxUint32, + // so we can safely drop the upper bits. + return uint32(*idsPerPod) +} + // getPodDir returns the full path to the per-pod directory for the pod with // the given UID. func (kl *Kubelet) getPodDir(podUID types.UID) string { diff --git a/pkg/kubelet/userns/types.go b/pkg/kubelet/userns/types.go index a0422d0042c..820c2aaadca 100644 --- a/pkg/kubelet/userns/types.go +++ b/pkg/kubelet/userns/types.go @@ -26,4 +26,5 @@ type userNsPodsManager interface { ListPodsFromDisk() ([]types.UID, error) GetKubeletMappings() (uint32, uint32, error) GetMaxPods() int + GetUserNamespacesIDsPerPod() uint32 } diff --git a/pkg/kubelet/userns/userns_manager.go b/pkg/kubelet/userns/userns_manager.go index 37528b94003..89a45e864bc 100644 --- a/pkg/kubelet/userns/userns_manager.go +++ b/pkg/kubelet/userns/userns_manager.go @@ -39,12 +39,14 @@ import ( utilfs "k8s.io/kubernetes/pkg/util/filesystem" ) -// length for the user namespace to create (65536). -const userNsLength = (1 << 16) +const ( + // Create a new map when we removed enough pods to avoid memory leaks + // since Go maps never free memory. + mapReInitializeThreshold = 1000 -// Create a new map when we removed enough pods to avoid memory leaks -// since Go maps never free memory. -const mapReInitializeThreshold = 1000 + // userNsUnitLength is the unit length of UserNS + userNsUnitLength = 65536 +) type UsernsManager struct { used *allocator.AllocationBitmap @@ -54,6 +56,8 @@ type UsernsManager struct { off int len int + userNsLength uint32 + kl userNsPodsManager // This protects all members except for kl.anager lock sync.Mutex @@ -130,6 +134,11 @@ func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) { return nil, fmt.Errorf("kubelet mappings: %w", err) } + userNsLength := kl.GetUserNamespacesIDsPerPod() + + if userNsLength%userNsUnitLength != 0 { + return nil, fmt.Errorf("kubelet user namespace length %v is not a multiple of %d", userNsLength, userNsUnitLength) + } if kubeletMappingID%userNsLength != 0 { return nil, fmt.Errorf("kubelet user assigned ID %v is not a multiple of %v", kubeletMappingID, userNsLength) } @@ -147,11 +156,12 @@ func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) { len := int(kubeletMappingLen / userNsLength) m := UsernsManager{ - used: allocator.NewAllocationMap(len, "user namespaces"), - usedBy: make(map[types.UID]uint32), - kl: kl, - off: off, - len: len, + used: allocator.NewAllocationMap(len, "user namespaces"), + usedBy: make(map[types.UID]uint32), + kl: kl, + off: off, + len: len, + userNsLength: userNsLength, } // do not bother reading the list of pods if user namespaces are not enabled. @@ -196,7 +206,7 @@ func (m *UsernsManager) recordPodMappings(pod types.UID) error { // isSet checks if the specified index is already set. func (m *UsernsManager) isSet(v uint32) bool { - index := int(v/userNsLength) - m.off + index := int(v/m.userNsLength) - m.off if index < 0 || index >= m.len { return true } @@ -217,24 +227,24 @@ func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint3 klog.V(5).InfoS("new pod user namespace allocation", "podUID", pod) - firstID = uint32((firstZero + m.off) * userNsLength) + firstID = uint32((firstZero + m.off)) * m.userNsLength m.usedBy[pod] = firstID - return firstID, userNsLength, nil + return firstID, m.userNsLength, nil } // record stores the user namespace [from; from+length] to the specified pod. func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) { - if length != userNsLength { + if length != m.userNsLength { return fmt.Errorf("wrong user namespace length %v", length) } - if from%userNsLength != 0 { + if from%m.userNsLength != 0 { return fmt.Errorf("wrong user namespace offset specified %v", from) } prevFrom, found := m.usedBy[pod] if found && prevFrom != from { return fmt.Errorf("different user namespace range already used by pod %q", pod) } - index := int(from/userNsLength) - m.off + index := int(from/m.userNsLength) - m.off if index < 0 || index >= m.len { return fmt.Errorf("id %v is out of range", from) } @@ -302,7 +312,7 @@ func (m *UsernsManager) releaseWithLock(pod types.UID) { m.usedBy = n m.removed = 0 } - _ = m.used.Release(int(v/userNsLength) - m.off) + _ = m.used.Release(int(v/m.userNsLength) - m.off) } func (m *UsernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) { diff --git a/pkg/kubelet/userns/userns_manager_test.go b/pkg/kubelet/userns/userns_manager_test.go index b2d0cc85f64..cb66fb854e5 100644 --- a/pkg/kubelet/userns/userns_manager_test.go +++ b/pkg/kubelet/userns/userns_manager_test.go @@ -38,10 +38,11 @@ import ( ) const ( + testUserNsLength = uint32(65536) // skip the first block - minimumMappingUID = userNsLength + minimumMappingUID = testUserNsLength // allocate enough space for 2000 user namespaces - mappingLen = userNsLength * 2000 + mappingLen = testUserNsLength * 2000 testMaxPods = 110 ) @@ -52,6 +53,7 @@ type testUserNsPodsManager struct { maxPods int mappingFirstID uint32 mappingLen uint32 + userNsLength uint32 } func (m *testUserNsPodsManager) GetPodDir(podUID types.UID) string { @@ -90,54 +92,91 @@ func (m *testUserNsPodsManager) GetMaxPods() int { return testMaxPods } +func (m *testUserNsPodsManager) GetUserNamespacesIDsPerPod() uint32 { + if m.userNsLength != 0 { + return m.userNsLength + } + return testUserNsLength +} + func TestUserNsManagerAllocate(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.UserNamespacesSupport, true) - testUserNsPodsManager := &testUserNsPodsManager{} - m, err := MakeUserNsManager(testUserNsPodsManager) - require.NoError(t, err) + customUserNsLength := uint32(1048576) - allocated, length, err := m.allocateOne("one") - assert.NoError(t, err) - assert.Equal(t, userNsLength, int(length), "m.isSet(%d).length=%v", allocated, length) - assert.True(t, m.isSet(allocated), "m.isSet(%d)", allocated) - - allocated2, length2, err := m.allocateOne("two") - assert.NoError(t, err) - assert.NotEqual(t, allocated, allocated2, "allocated != allocated2") - assert.Equal(t, length, length2, "length == length2") - - // verify that re-adding the same pod with the same settings won't fail - err = m.record("two", allocated2, length2) - assert.NoError(t, err) - // but it fails if anyting is different - err = m.record("two", allocated2+1, length2) - assert.Error(t, err) - - m.Release("one") - m.Release("two") - assert.False(t, m.isSet(allocated), "m.isSet(%d)", allocated) - assert.False(t, m.isSet(allocated2), "m.nsSet(%d)", allocated2) - - var allocs []uint32 - for i := 0; i < 1000; i++ { - allocated, length, err = m.allocateOne(types.UID(fmt.Sprintf("%d", i))) - assert.Equal(t, userNsLength, int(length), "length is not the expected. iter: %v", i) - assert.NoError(t, err) - assert.GreaterOrEqual(t, allocated, uint32(minimumMappingUID)) - // The last ID of the userns range (allocated+userNsLength) should be within bounds. - assert.LessOrEqual(t, allocated, uint32(minimumMappingUID+mappingLen-userNsLength)) - allocs = append(allocs, allocated) + cases := []struct { + name string + userNsLength uint32 + mappingFirstID uint32 + mappingLen uint32 + }{ + { + name: "default", + userNsLength: testUserNsLength, + mappingFirstID: minimumMappingUID, + mappingLen: mappingLen, + }, + { + name: "custom", + userNsLength: customUserNsLength, + mappingFirstID: customUserNsLength, + mappingLen: customUserNsLength * 2000, + }, } - for i, v := range allocs { - assert.True(t, m.isSet(v), "m.isSet(%d) should be true", v) - m.Release(types.UID(fmt.Sprintf("%d", i))) - assert.False(t, m.isSet(v), "m.isSet(%d) should be false", v) - err = m.record(types.UID(fmt.Sprintf("%d", i)), v, userNsLength) - assert.NoError(t, err) - m.Release(types.UID(fmt.Sprintf("%d", i))) - assert.False(t, m.isSet(v), "m.isSet(%d) should be false", v) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + testUserNsPodsManager := &testUserNsPodsManager{ + userNsLength: tc.userNsLength, + mappingFirstID: tc.mappingFirstID, + mappingLen: tc.mappingLen, + } + m, err := MakeUserNsManager(testUserNsPodsManager) + require.NoError(t, err) + + allocated, length, err := m.allocateOne("one") + require.NoError(t, err) + assert.Equal(t, tc.userNsLength, length, "m.isSet(%d).length=%v", allocated, length) + assert.True(t, m.isSet(allocated), "m.isSet(%d)", allocated) + + allocated2, length2, err := m.allocateOne("two") + require.NoError(t, err) + assert.NotEqual(t, allocated, allocated2, "allocated != allocated2") + assert.Equal(t, length, length2, "length == length2") + + // verify that re-adding the same pod with the same settings won't fail + err = m.record("two", allocated2, length2) + require.NoError(t, err) + // but it fails if anyting is different + err = m.record("two", allocated2+1, length2) + require.Error(t, err) + + m.Release("one") + m.Release("two") + assert.False(t, m.isSet(allocated), "m.isSet(%d)", allocated) + assert.False(t, m.isSet(allocated2), "m.nsSet(%d)", allocated2) + + var allocs []uint32 + for i := 0; i < 1000; i++ { + allocated, length, err = m.allocateOne(types.UID(fmt.Sprintf("%d", i))) + assert.Equal(t, tc.userNsLength, length, "length is not the expected. iter: %v", i) + require.NoError(t, err) + assert.GreaterOrEqual(t, allocated, tc.mappingFirstID) + // The last ID of the userns range (allocated+userNsLength) should be within bounds. + assert.LessOrEqual(t, allocated, tc.mappingFirstID+tc.mappingLen-tc.userNsLength) + allocs = append(allocs, allocated) + } + for i, v := range allocs { + assert.True(t, m.isSet(v), "m.isSet(%d) should be true", v) + m.Release(types.UID(fmt.Sprintf("%d", i))) + assert.False(t, m.isSet(v), "m.isSet(%d) should be false", v) + + err = m.record(types.UID(fmt.Sprintf("%d", i)), v, tc.userNsLength) + require.NoError(t, err) + m.Release(types.UID(fmt.Sprintf("%d", i))) + assert.False(t, m.isSet(v), "m.isSet(%d) should be false", v) + } + }) } } diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index b08cd91ad51..163640b811a 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -924,6 +924,11 @@ type KubeletConfiguration struct { // Default: false // +optional FailCgroupV1 *bool `json:"failCgroupV1,omitempty"` + + // UserNamespaces contains User Namespace configurations. + // +featureGate=UserNamespaceSupport + // +optional + UserNamespaces *UserNamespaces `json:"userNamespaces,omitempty"` } type KubeletAuthorizationMode string @@ -1119,3 +1124,17 @@ type ExecEnvVar struct { Name string `json:"name"` Value string `json:"value"` } + +// UserNamespaces contains User Namespace configurations. +type UserNamespaces struct { + // IDsPerPod is the mapping length of UIDs and GIDs. + // The length must be a multiple of 65536, and must be less than 1<<32. + // On non-linux such as windows, only null / absent is allowed. + // + // Changing the value may require recreating all containers on the node. + // + // Default: 65536 + // +featureGate=UserNamespaceSupport + // +optional + IDsPerPod *int64 `json:"idsPerPod,omitempty"` +} diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go index 1c3f792da05..ac0ba7096bc 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go @@ -527,6 +527,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { *out = new(bool) **out = **in } + if in.UserNamespaces != nil { + in, out := &in.UserNamespaces, &out.UserNamespaces + *out = new(UserNamespaces) + (*in).DeepCopyInto(*out) + } return } @@ -684,3 +689,24 @@ func (in *ShutdownGracePeriodByPodPriority) DeepCopy() *ShutdownGracePeriodByPod in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UserNamespaces) DeepCopyInto(out *UserNamespaces) { + *out = *in + if in.IDsPerPod != nil { + in, out := &in.IDsPerPod, &out.IDsPerPod + *out = new(int64) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UserNamespaces. +func (in *UserNamespaces) DeepCopy() *UserNamespaces { + if in == nil { + return nil + } + out := new(UserNamespaces) + in.DeepCopyInto(out) + return out +}