diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 9b6c70e0024..61893181ed9 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -59,7 +59,7 @@ type RuntimeHelper interface { GetExtraSupplementalGroupsForPod(pod *v1.Pod) []int64 // GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace - GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) + GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) // PrepareDynamicResources prepares resources for a pod. PrepareDynamicResources(pod *v1.Pod) error diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index d2943b9ce91..b7d82ca1773 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -526,6 +526,8 @@ const ( type RuntimeStatus struct { // Conditions is an array of current observed runtime conditions. Conditions []RuntimeCondition + // Handlers is a map of current available handlers + Handlers map[string]RuntimeHandler } // GetRuntimeCondition gets a specified runtime condition from the runtime status. @@ -542,10 +544,28 @@ func (r *RuntimeStatus) GetRuntimeCondition(t RuntimeConditionType) *RuntimeCond // String formats the runtime status into human readable string. func (r *RuntimeStatus) String() string { var ss []string + var sh []string for _, c := range r.Conditions { ss = append(ss, c.String()) } - return fmt.Sprintf("Runtime Conditions: %s", strings.Join(ss, ", ")) + for _, h := range r.Handlers { + sh = append(sh, h.String()) + } + return fmt.Sprintf("Runtime Conditions: %s; Handlers: %s", strings.Join(ss, ", "), strings.Join(sh, ", ")) +} + +// RuntimeHandler contains condition information for the runtime handler. +type RuntimeHandler struct { + // Name is the handler name. + Name string + // SupportsUserNamespaces is true if the handler has support for + // user namespaces. + SupportsUserNamespaces bool +} + +// String formats the runtime handler into human readable string. +func (h *RuntimeHandler) String() string { + return fmt.Sprintf("Name=%s SupportsUserNamespaces: %v", h.Name, h.SupportsUserNamespaces) } // RuntimeCondition contains condition information for the runtime. diff --git a/pkg/kubelet/container/testing/fake_runtime_helper.go b/pkg/kubelet/container/testing/fake_runtime_helper.go index 36b6f6c3fd3..f52ec20a965 100644 --- a/pkg/kubelet/container/testing/fake_runtime_helper.go +++ b/pkg/kubelet/container/testing/fake_runtime_helper.go @@ -68,7 +68,7 @@ func (f *FakeRuntimeHelper) GetExtraSupplementalGroupsForPod(pod *v1.Pod) []int6 return nil } -func (f *FakeRuntimeHelper) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) { +func (f *FakeRuntimeHelper) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) { return nil, nil } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 2bd35b96611..59c81c21841 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1178,7 +1178,8 @@ type Kubelet struct { updatePodCIDRMux sync.Mutex // updateRuntimeMux is a lock on updating runtime, because this path is not thread-safe. - // This lock is used by Kubelet.updateRuntimeUp and Kubelet.fastNodeStatusUpdate functions and shouldn't be used anywhere else. + // This lock is used by Kubelet.updateRuntimeUp, Kubelet.fastNodeStatusUpdate and + // Kubelet.HandlerSupportsUserNamespaces functions and shouldn't be used anywhere else. updateRuntimeMux sync.Mutex // nodeLeaseController claims and renews the node lease for this Kubelet @@ -2898,7 +2899,9 @@ func (kl *Kubelet) updateRuntimeUp() { kl.runtimeState.setRuntimeState(fmt.Errorf("container runtime not ready: %v", runtimeReady)) return } + kl.runtimeState.setRuntimeState(nil) + kl.runtimeState.setRuntimeHandlers(s.Handlers) kl.oneTimeInitializer.Do(kl.initializeRuntimeDependentModules) kl.runtimeState.setRuntimeSync(kl.clock.Now()) } diff --git a/pkg/kubelet/kubelet_getters.go b/pkg/kubelet/kubelet_getters.go index d6aa8732fed..4ef51b9791f 100644 --- a/pkg/kubelet/kubelet_getters.go +++ b/pkg/kubelet/kubelet_getters.go @@ -109,6 +109,20 @@ func (kl *Kubelet) ListPodsFromDisk() ([]types.UID, error) { return kl.listPodsFromDisk() } +// HandlerSupportsUserNamespaces checks whether the specified handler supports +// user namespaces. +func (kl *Kubelet) HandlerSupportsUserNamespaces(rtHandler string) (bool, error) { + rtHandlers := kl.runtimeState.runtimeHandlers() + if rtHandlers == nil { + return false, fmt.Errorf("runtime handlers are not set") + } + h, found := rtHandlers[rtHandler] + if !found { + return false, fmt.Errorf("the handler %q is not known", rtHandler) + } + return h.SupportsUserNamespaces, nil +} + // getPodDir returns the full path to the per-pod directory for the pod with // the given UID. func (kl *Kubelet) getPodDir(podUID types.UID) string { diff --git a/pkg/kubelet/kubelet_getters_test.go b/pkg/kubelet/kubelet_getters_test.go index 423df637d90..f610acaeb21 100644 --- a/pkg/kubelet/kubelet_getters_test.go +++ b/pkg/kubelet/kubelet_getters_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) func TestKubeletDirs(t *testing.T) { @@ -99,3 +100,32 @@ func TestKubeletDirs(t *testing.T) { exp = filepath.Join(root, "pods/abc123/volume-subpaths") assert.Equal(t, exp, got) } + +func TestHandlerSupportsUserNamespaces(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kubelet := testKubelet.kubelet + + kubelet.runtimeState.setRuntimeHandlers(map[string]kubecontainer.RuntimeHandler{ + "has-support": { + Name: "has-support", + SupportsUserNamespaces: true, + }, + "has-no-support": { + Name: "has-support", + SupportsUserNamespaces: false, + }, + }) + + got, err := kubelet.HandlerSupportsUserNamespaces("has-support") + assert.Equal(t, true, got) + assert.NoError(t, err) + + got, err = kubelet.HandlerSupportsUserNamespaces("has-no-support") + assert.Equal(t, false, got) + assert.NoError(t, err) + + got, err = kubelet.HandlerSupportsUserNamespaces("unknown") + assert.Equal(t, false, got) + assert.Error(t, err) +} diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index ff40775c900..fd6dedca2bb 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -425,8 +425,8 @@ func truncatePodHostnameIfNeeded(podName, hostname string) (string, error) { } // GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace -func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) { - return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod) +func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) { + return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod, runtimeHandler) } // GeneratePodHostNameAndDomain creates a hostname and domain name for a pod, diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index bb302bc5b02..50261972236 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -200,7 +200,7 @@ func parsePodUIDFromLogsDirectory(name string) types.UID { } // toKubeRuntimeStatus converts the runtimeapi.RuntimeStatus to kubecontainer.RuntimeStatus. -func toKubeRuntimeStatus(status *runtimeapi.RuntimeStatus) *kubecontainer.RuntimeStatus { +func toKubeRuntimeStatus(status *runtimeapi.RuntimeStatus, handlers []*runtimeapi.RuntimeHandler) *kubecontainer.RuntimeStatus { conditions := []kubecontainer.RuntimeCondition{} for _, c := range status.GetConditions() { conditions = append(conditions, kubecontainer.RuntimeCondition{ @@ -210,7 +210,18 @@ func toKubeRuntimeStatus(status *runtimeapi.RuntimeStatus) *kubecontainer.Runtim Message: c.Message, }) } - return &kubecontainer.RuntimeStatus{Conditions: conditions} + retHandlers := make(map[string]kubecontainer.RuntimeHandler) + for _, h := range handlers { + supportsUserns := false + if h.Features != nil { + supportsUserns = h.Features.UserNamespaces + } + retHandlers[h.Name] = kubecontainer.RuntimeHandler{ + Name: h.Name, + SupportsUserNamespaces: supportsUserns, + } + } + return &kubecontainer.RuntimeStatus{Conditions: conditions, Handlers: retHandlers} } func fieldSeccompProfile(scmp *v1.SeccompProfile, profileRootPath string, fallbackToRuntimeDefault bool) (*runtimeapi.SecurityProfile, error) { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index f833c28e426..688c5a843fd 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -353,7 +353,7 @@ func (m *kubeGenericRuntimeManager) Status(ctx context.Context) (*kubecontainer. if resp.GetStatus() == nil { return nil, errors.New("runtime status is nil") } - return toKubeRuntimeStatus(resp.GetStatus()), nil + return toKubeRuntimeStatus(resp.GetStatus(), resp.GetRuntimeHandlers()), nil } // GetPods returns a list of containers grouped by pods. The boolean parameter diff --git a/pkg/kubelet/kuberuntime/util/util.go b/pkg/kubelet/kuberuntime/util/util.go index af61e17567e..c01f82e83c8 100644 --- a/pkg/kubelet/kuberuntime/util/util.go +++ b/pkg/kubelet/kuberuntime/util/util.go @@ -100,7 +100,11 @@ func PidNamespaceForPod(pod *v1.Pod) runtimeapi.NamespaceMode { // namespacesForPod returns the runtimeapi.NamespaceOption for a given pod. // An empty or nil pod can be used to get the namespace defaults for v1.Pod. func NamespacesForPod(pod *v1.Pod, runtimeHelper kubecontainer.RuntimeHelper) (*runtimeapi.NamespaceOption, error) { - userNs, err := runtimeHelper.GetOrCreateUserNamespaceMappings(pod) + runtimeHandler := "" + if pod != nil && pod.Spec.RuntimeClassName != nil { + runtimeHandler = *pod.Spec.RuntimeClassName + } + userNs, err := runtimeHelper.GetOrCreateUserNamespaceMappings(pod, runtimeHandler) if err != nil { return nil, err } diff --git a/pkg/kubelet/runtime.go b/pkg/kubelet/runtime.go index 9ee5ece1ff4..cc16dfdcd52 100644 --- a/pkg/kubelet/runtime.go +++ b/pkg/kubelet/runtime.go @@ -23,6 +23,7 @@ import ( "time" utilerrors "k8s.io/apimachinery/pkg/util/errors" + kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) type runtimeState struct { @@ -34,6 +35,7 @@ type runtimeState struct { storageError error cidr string healthChecks []*healthCheck + rtHandlers map[string]kubecontainer.RuntimeHandler } // A health check function should be efficient and not rely on external @@ -69,6 +71,18 @@ func (s *runtimeState) setRuntimeState(err error) { s.runtimeError = err } +func (s *runtimeState) setRuntimeHandlers(rtHandlers map[string]kubecontainer.RuntimeHandler) { + s.Lock() + defer s.Unlock() + s.rtHandlers = rtHandlers +} + +func (s *runtimeState) runtimeHandlers() map[string]kubecontainer.RuntimeHandler { + s.RLock() + defer s.RUnlock() + return s.rtHandlers +} + func (s *runtimeState) setStorageState(err error) { s.Lock() defer s.Unlock() diff --git a/pkg/kubelet/userns/userns_manager.go b/pkg/kubelet/userns/userns_manager.go index 8e333de7905..603dd053906 100644 --- a/pkg/kubelet/userns/userns_manager.go +++ b/pkg/kubelet/userns/userns_manager.go @@ -49,6 +49,7 @@ const maxPods = 1024 const mapReInitializeThreshold = 1000 type userNsPodsManager interface { + HandlerSupportsUserNamespaces(runtimeHandler string) (bool, error) GetPodDir(podUID types.UID) string ListPodsFromDisk() ([]types.UID, error) } @@ -379,20 +380,41 @@ func (m *UsernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err err } // GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace -func (m *UsernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) { - if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) { +func (m *UsernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) { + featureEnabled := utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) + + if pod == nil || pod.Spec.HostUsers == nil { + // if the feature is enabled, specify to use the node mode... + if featureEnabled { + return &runtimeapi.UserNamespace{ + Mode: runtimeapi.NamespaceMode_NODE, + }, nil + } + // ...otherwise don't even specify it return nil, nil } - - m.lock.Lock() - defer m.lock.Unlock() - - if pod.Spec.HostUsers == nil || *pod.Spec.HostUsers { + // pod.Spec.HostUsers is set to true/false + if !featureEnabled { + return nil, fmt.Errorf("the feature gate %q is disabled: can't set spec.HostUsers", features.UserNamespacesSupport) + } + if *pod.Spec.HostUsers { return &runtimeapi.UserNamespace{ Mode: runtimeapi.NamespaceMode_NODE, }, nil } + // From here onwards, hostUsers=false and the feature gate is enabled. + + // if the pod requested a user namespace and the runtime doesn't support user namespaces then return an error. + if handlerSupportsUserns, err := m.kl.HandlerSupportsUserNamespaces(runtimeHandler); err != nil { + return nil, err + } else if !handlerSupportsUserns { + return nil, fmt.Errorf("RuntimeClass handler %q does not support user namespaces", runtimeHandler) + } + + m.lock.Lock() + defer m.lock.Unlock() + content, err := m.readMappingsFromFile(pod.UID) if err != nil && err != utilstore.ErrKeyNotFound { return nil, err diff --git a/pkg/kubelet/userns/userns_manager_disabled_test.go b/pkg/kubelet/userns/userns_manager_disabled_test.go index 1da50867b16..8f0a1a20215 100644 --- a/pkg/kubelet/userns/userns_manager_disabled_test.go +++ b/pkg/kubelet/userns/userns_manager_disabled_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" pkgfeatures "k8s.io/kubernetes/pkg/features" @@ -49,13 +50,61 @@ func TestReleaseDisabled(t *testing.T) { func TestGetOrCreateUserNamespaceMappingsDisabled(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.UserNamespacesSupport, false)() - testUserNsPodsManager := &testUserNsPodsManager{} - m, err := MakeUserNsManager(testUserNsPodsManager) - require.NoError(t, err) + trueVal := true + falseVal := false - userns, err := m.GetOrCreateUserNamespaceMappings(nil) - assert.NoError(t, err) - assert.Nil(t, userns) + tests := []struct { + name string + pod *v1.Pod + success bool + }{ + { + name: "pod is nil", + pod: nil, + success: true, + }, + { + name: "hostUsers is nil", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + HostUsers: nil, + }, + }, + success: true, + }, + { + name: "hostUsers is true", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + HostUsers: &trueVal, + }, + }, + }, + { + name: "hostUsers is false", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + HostUsers: &falseVal, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + testUserNsPodsManager := &testUserNsPodsManager{} + m, err := MakeUserNsManager(testUserNsPodsManager) + require.NoError(t, err) + + userns, err := m.GetOrCreateUserNamespaceMappings(test.pod, "") + assert.Nil(t, userns) + if test.success { + assert.NoError(t, err) + } else { + assert.Error(t, err) + } + }) + } } func TestCleanupOrphanedPodUsernsAllocationsDisabled(t *testing.T) { diff --git a/pkg/kubelet/userns/userns_manager_test.go b/pkg/kubelet/userns/userns_manager_test.go index 6d2d9d6f1f6..8c498955b78 100644 --- a/pkg/kubelet/userns/userns_manager_test.go +++ b/pkg/kubelet/userns/userns_manager_test.go @@ -17,6 +17,7 @@ limitations under the License. package userns import ( + "errors" "fmt" "os" "testing" @@ -36,6 +37,7 @@ import ( type testUserNsPodsManager struct { podDir string podList []types.UID + userns bool } func (m *testUserNsPodsManager) GetPodDir(podUID types.UID) string { @@ -52,6 +54,13 @@ func (m *testUserNsPodsManager) ListPodsFromDisk() ([]types.UID, error) { return m.podList, nil } +func (m *testUserNsPodsManager) HandlerSupportsUserNamespaces(runtimeHandler string) (bool, error) { + if runtimeHandler == "error" { + return false, errors.New("unknown runtime") + } + return m.userns, nil +} + func TestUserNsManagerAllocate(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.UserNamespacesSupport, true)() @@ -192,10 +201,12 @@ func TestGetOrCreateUserNamespaceMappings(t *testing.T) { falseVal := false cases := []struct { - name string - pod *v1.Pod - expMode runtimeapi.NamespaceMode - success bool + name string + pod *v1.Pod + expMode runtimeapi.NamespaceMode + runtimeUserns bool + runtimeHandler string + success bool }{ { name: "no user namespace", @@ -203,6 +214,12 @@ func TestGetOrCreateUserNamespaceMappings(t *testing.T) { expMode: runtimeapi.NamespaceMode_NODE, success: true, }, + { + name: "nil pod", + pod: nil, + expMode: runtimeapi.NamespaceMode_NODE, + success: true, + }, { name: "opt-in to host user namespace", pod: &v1.Pod{ @@ -220,19 +237,42 @@ func TestGetOrCreateUserNamespaceMappings(t *testing.T) { HostUsers: &falseVal, }, }, - expMode: runtimeapi.NamespaceMode_POD, - success: true, + expMode: runtimeapi.NamespaceMode_POD, + runtimeUserns: true, + success: true, + }, + { + name: "user namespace, but no runtime support", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + HostUsers: &falseVal, + }, + }, + runtimeUserns: false, + }, + { + name: "user namespace, but runtime returns error", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + HostUsers: &falseVal, + }, + }, + // This handler name makes the fake runtime return an error. + runtimeHandler: "error", }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { // These tests will create the userns file, so use an existing podDir. - testUserNsPodsManager := &testUserNsPodsManager{podDir: t.TempDir()} + testUserNsPodsManager := &testUserNsPodsManager{ + podDir: t.TempDir(), + userns: tc.runtimeUserns, + } m, err := MakeUserNsManager(testUserNsPodsManager) assert.NoError(t, err) - userns, err := m.GetOrCreateUserNamespaceMappings(tc.pod) + userns, err := m.GetOrCreateUserNamespaceMappings(tc.pod, tc.runtimeHandler) if (tc.success && err != nil) || (!tc.success && err == nil) { t.Errorf("expected success: %v but got error: %v", tc.success, err) }