diff --git a/pkg/kubelet/kubelet_getters.go b/pkg/kubelet/kubelet_getters.go index 4ef51b9791f..e1d00e3ed12 100644 --- a/pkg/kubelet/kubelet_getters.go +++ b/pkg/kubelet/kubelet_getters.go @@ -123,6 +123,11 @@ func (kl *Kubelet) HandlerSupportsUserNamespaces(rtHandler string) (bool, error) return h.SupportsUserNamespaces, nil } +// GetKubeletMappings gets the additional IDs allocated for the Kubelet. +func (kl *Kubelet) GetKubeletMappings() (uint32, uint32, error) { + return kl.getKubeletMappings() +} + // getPodDir returns the full path to the per-pod directory for the pod with // the given UID. func (kl *Kubelet) getPodDir(podUID types.UID) string { diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index fd6dedca2bb..7ad7a68808a 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -19,14 +19,18 @@ package kubelet import ( "bytes" "context" + goerrors "errors" "fmt" "io" "net/http" "net/url" "os" + "os/exec" + "os/user" "path/filepath" "runtime" "sort" + "strconv" "strings" "github.com/google/go-cmp/cmp" @@ -76,8 +80,90 @@ const ( const ( PodInitializing = "PodInitializing" ContainerCreating = "ContainerCreating" + + kubeletUser = "kubelet" ) +// parseGetSubIdsOutput parses the output from the `getsubids` tool, which is used to query subordinate user or group ID ranges for +// a given user or group. getsubids produces a line for each mapping configured. +// Here we expect that there is a single mapping, and the same values are used for the subordinate user and group ID ranges. +// The output is something like: +// $ getsubids kubelet +// 0: kubelet 65536 2147483648 +// $ getsubids -g kubelet +// 0: kubelet 65536 2147483648 +func parseGetSubIdsOutput(input string) (uint32, uint32, error) { + lines := strings.Split(strings.Trim(input, "\n"), "\n") + if len(lines) != 1 { + return 0, 0, fmt.Errorf("error parsing line %q: it must contain only one line", input) + } + + parts := strings.Fields(lines[0]) + if len(parts) != 4 { + return 0, 0, fmt.Errorf("invalid line %q", input) + } + + // Parsing the numbers + num1, err := strconv.ParseUint(parts[2], 10, 32) + if err != nil { + return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err) + } + + num2, err := strconv.ParseUint(parts[3], 10, 32) + if err != nil { + return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err) + } + + return uint32(num1), uint32(num2), nil +} + +// getKubeletMappings returns the range of IDs that can be used to configure user namespaces. +// If subordinate user or group ID ranges are specified for the kubelet user and the getsubids tool +// is installed, then the single mapping specified both for user and group IDs will be used. +// If the tool is not installed, or there are no IDs configured, the default mapping is returned. +// The default mapping includes the entire IDs range except IDs below 65536. +func (kl *Kubelet) getKubeletMappings() (uint32, uint32, error) { + // default mappings to return if there is no specific configuration + const defaultFirstID = 1 << 16 + const defaultLen = 1<<32 - defaultFirstID + + if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) { + return defaultFirstID, defaultLen, nil + } + + _, err := user.Lookup(kubeletUser) + if err != nil { + var unknownUserErr user.UnknownUserError + if goerrors.As(err, &unknownUserErr) { + // if the user is not found, we assume that the user is not configured + return defaultFirstID, defaultLen, nil + } + return 0, 0, err + } + + execName := "getsubids" + cmd, err := exec.LookPath(execName) + if err != nil { + if os.IsNotExist(err) { + klog.V(2).InfoS("Could not find executable, default mappings will be used for the user namespaces", "executable", execName, "err", err) + return defaultFirstID, defaultLen, nil + } + return 0, 0, err + } + outUids, err := exec.Command(cmd, kubeletUser).Output() + if err != nil { + return 0, 0, fmt.Errorf("error retrieving additional ids for user %q", kubeletUser) + } + outGids, err := exec.Command(cmd, "-g", kubeletUser).Output() + if err != nil { + return 0, 0, fmt.Errorf("error retrieving additional gids for user %q", kubeletUser) + } + if string(outUids) != string(outGids) { + return 0, 0, fmt.Errorf("mismatched subuids and subgids for user %q", kubeletUser) + } + return parseGetSubIdsOutput(string(outUids)) +} + // Get a list of pods that have data directories. func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) { podInfos, err := os.ReadDir(kl.getPodsDir()) diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 90c42dde60f..ae6710c50c6 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -6013,3 +6013,77 @@ func TestGetNonExistentImagePullSecret(t *testing.T) { event := <-fakeRecorder.Events assert.Equal(t, event, expectedEvent) } + +func TestParseGetSubIdsOutput(t *testing.T) { + tests := []struct { + name string + input string + wantFirstID uint32 + wantRangeLen uint32 + wantErr bool + }{ + { + name: "valid", + input: "0: kubelet 65536 2147483648", + wantFirstID: 65536, + wantRangeLen: 2147483648, + }, + { + name: "multiple lines", + input: "0: kubelet 1 2\n1: kubelet 3 4\n", + wantErr: true, + }, + { + name: "wrong format", + input: "0: kubelet 65536", + wantErr: true, + }, + { + name: "non numeric 1", + input: "0: kubelet Foo 65536", + wantErr: true, + }, + { + name: "non numeric 2", + input: "0: kubelet 0 Bar", + wantErr: true, + }, + { + name: "overflow 1", + input: "0: kubelet 4294967296 2147483648", + wantErr: true, + }, + { + name: "overflow 2", + input: "0: kubelet 65536 4294967296", + wantErr: true, + }, + { + name: "negative value 1", + input: "0: kubelet -1 2147483648", + wantErr: true, + }, + { + name: "negative value 2", + input: "0: kubelet 65536 -1", + wantErr: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotFirstID, gotRangeLen, err := parseGetSubIdsOutput(tc.input) + if tc.wantErr { + if err == nil { + t.Errorf("%s: expected error, got nil", tc.name) + } + } else { + if err != nil { + t.Errorf("%s: unexpected error: %v", tc.name, err) + } + if gotFirstID != tc.wantFirstID || gotRangeLen != tc.wantRangeLen { + t.Errorf("%s: got (%d, %d), want (%d, %d)", tc.name, gotFirstID, gotRangeLen, tc.wantFirstID, tc.wantRangeLen) + } + } + }) + } +} diff --git a/pkg/kubelet/userns/userns_manager.go b/pkg/kubelet/userns/userns_manager.go index 603dd053906..56a9a8e8ade 100644 --- a/pkg/kubelet/userns/userns_manager.go +++ b/pkg/kubelet/userns/userns_manager.go @@ -19,7 +19,6 @@ package userns import ( "encoding/json" "fmt" - "math" "os" "path/filepath" "sync" @@ -52,6 +51,7 @@ type userNsPodsManager interface { HandlerSupportsUserNamespaces(runtimeHandler string) (bool, error) GetPodDir(podUID types.UID) string ListPodsFromDisk() ([]types.UID, error) + GetKubeletMappings() (uint32, uint32, error) } type UsernsManager struct { @@ -59,7 +59,11 @@ type UsernsManager struct { usedBy map[types.UID]uint32 // Map pod.UID to range used removed int numAllocated int - kl userNsPodsManager + + off int + len int + + kl userNsPodsManager // This protects all members except for kl.anager lock sync.Mutex } @@ -130,16 +134,33 @@ func (m *UsernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) { } func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) { + kubeletMappingID, kubeletMappingLen, err := kl.GetKubeletMappings() + if err != nil { + return nil, err + } + + if kubeletMappingID%userNsLength != 0 { + return nil, fmt.Errorf("kubelet user assigned ID %v is not a multiple of %v", kubeletMappingID, userNsLength) + } + if kubeletMappingID < userNsLength { + // We don't allow to map 0, as security is circumvented. + return nil, fmt.Errorf("kubelet user assigned ID %v must be greater or equal to %v", kubeletMappingID, userNsLength) + } + if kubeletMappingLen%userNsLength != 0 { + return nil, fmt.Errorf("kubelet user assigned IDs length %v is not a multiple of %v", kubeletMappingLen, userNsLength) + } + if kubeletMappingLen/userNsLength < maxPods { + return nil, fmt.Errorf("kubelet user assigned IDs are not enough to support %v pods", maxPods) + } + off := int(kubeletMappingID / userNsLength) + len := int(kubeletMappingLen / userNsLength) + m := UsernsManager{ - // Create a bitArray for all the UID space (2^32). - // As a by product of that, no index param to bitArray can be out of bounds (index is uint32). - used: allocator.NewAllocationMap((math.MaxUint32+1)/userNsLength, "user namespaces"), + used: allocator.NewAllocationMap(len, "user namespaces"), usedBy: make(map[types.UID]uint32), kl: kl, - } - // First block is reserved for the host. - if _, err := m.used.Allocate(0); err != nil { - return nil, err + off: off, + len: len, } // do not bother reading the list of pods if user namespaces are not enabled. @@ -184,7 +205,10 @@ func (m *UsernsManager) recordPodMappings(pod types.UID) error { // isSet checks if the specified index is already set. func (m *UsernsManager) isSet(v uint32) bool { - index := int(v / userNsLength) + index := int(v/userNsLength) - m.off + if index < 0 || index >= m.len { + return true + } return m.used.Has(index) } @@ -212,7 +236,7 @@ func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint3 klog.V(5).InfoS("new pod user namespace allocation", "podUID", pod) - firstID = uint32(firstZero * userNsLength) + firstID = uint32((firstZero + m.off) * userNsLength) m.usedBy[pod] = firstID return firstID, userNsLength, nil } @@ -229,7 +253,10 @@ func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) { if found && prevFrom != from { return fmt.Errorf("different user namespace range already used by pod %q", pod) } - index := int(from / userNsLength) + index := int(from/userNsLength) - m.off + if index < 0 || index >= m.len { + return fmt.Errorf("id %v is out of range", from) + } // if the pod wasn't found then verify the range is free. if !found && m.used.Has(index) { return fmt.Errorf("range picked for pod %q already taken", pod) @@ -304,7 +331,7 @@ func (m *UsernsManager) releaseWithLock(pod types.UID) { m.usedBy = n m.removed = 0 } - m.used.Release(int(v / userNsLength)) + _ = m.used.Release(int(v/userNsLength) - m.off) } func (m *UsernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) { diff --git a/pkg/kubelet/userns/userns_manager_test.go b/pkg/kubelet/userns/userns_manager_test.go index 8c498955b78..626f743b46d 100644 --- a/pkg/kubelet/userns/userns_manager_test.go +++ b/pkg/kubelet/userns/userns_manager_test.go @@ -34,6 +34,13 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) +const ( + // skip the first block + minimumMappingUID = userNsLength + // allocate enough space for 2000 user namespaces + mappingLen = userNsLength * 2000 +) + type testUserNsPodsManager struct { podDir string podList []types.UID @@ -61,6 +68,10 @@ func (m *testUserNsPodsManager) HandlerSupportsUserNamespaces(runtimeHandler str return m.userns, nil } +func (m *testUserNsPodsManager) GetKubeletMappings() (uint32, uint32, error) { + return minimumMappingUID, mappingLen, nil +} + func TestUserNsManagerAllocate(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.UserNamespacesSupport, true)() @@ -97,6 +108,9 @@ func TestUserNsManagerAllocate(t *testing.T) { allocated, length, err = m.allocateOne(types.UID(fmt.Sprintf("%d", i))) assert.Equal(t, userNsLength, int(length), "length is not the expected. iter: %v", i) assert.NoError(t, err) + assert.True(t, allocated >= minimumMappingUID) + // The last ID of the userns range (allocated+userNsLength) should be within bounds. + assert.True(t, allocated <= minimumMappingUID+mappingLen-userNsLength) allocs = append(allocs, allocated) } for i, v := range allocs {