From 1fde2b884c7110c5e253db7143b24bfd91202c4d Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 29 Apr 2025 17:11:42 +0200 Subject: [PATCH] DRA node: reject static pods which reference ResourceClaims If someone gains the ability to create static pods, they might try to use that ability to run code which gets access to the resources associated with some existing claim which was previously allocated for some other pod. Such an attempt already fails because the claim status tracks which pods are allowed to use the claim, the static pod is not in that list, the node is not authorized to add it, and the kubelet checks that list before starting the pod in https://github.com/kubernetes/kubernetes/blob/195803cde570ad1025a78e36cdbef76bddbc4c33/pkg/kubelet/cm/dra/manager.go#L218-L222. Even if the pod were started, DRA drivers typically manage node-local resources which can already be accessed via such an attack without involving DRA. DRA drivers which manage non-node-local resources have to consider access by a compromised node as part of their threat model. Nonetheless, it is better to not accept static pods which reference ResourceClaims or ResourceClaimTemplates in the first place because there is no valid use case for it. This is done at different levels for defense in depth: - configuration validation in the kubelet - admission checking of node restrictions - API validation Co-authored-by: Jordan Liggitt Code changes by Jordan, with one small change (resourceClaims -> resourceclaims). Unit tests by Patrick. --- pkg/apis/core/validation/validation.go | 7 +++ pkg/apis/core/validation/validation_test.go | 11 +++- pkg/kubelet/config/common.go | 6 ++ pkg/kubelet/config/common_test.go | 56 +++++++++++++++++++ .../admission/noderestriction/admission.go | 4 ++ .../noderestriction/admission_test.go | 9 +++ 6 files changed, 92 insertions(+), 1 deletion(-) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index fa96a723244..71233f5074a 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -3038,6 +3038,13 @@ func gatherPodResourceClaimNames(claims []core.PodResourceClaim) sets.Set[string } func validatePodResourceClaim(podMeta *metav1.ObjectMeta, claim core.PodResourceClaim, podClaimNames *sets.Set[string], fldPath *field.Path) field.ErrorList { + // static pods don't support resource claims + if podMeta != nil { + if _, ok := podMeta.Annotations[core.MirrorPodAnnotationKey]; ok { + return field.ErrorList{field.Forbidden(field.NewPath(""), "static pods do not support resource claims")} + } + } + var allErrs field.ErrorList if claim.Name == "" { allErrs = append(allErrs, field.Required(fldPath.Child("name"), "")) diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 7a24ceb2608..a2ade572b37 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -25321,6 +25321,8 @@ func TestValidateDynamicResourceAllocation(t *testing.T) { } failureCases := map[string]*core.Pod{ + "static pod with resource claim reference": goodClaimReference, + "static pod with resource claim template": goodClaimTemplate, "pod claim name with prefix": podtest.MakePod("", podtest.SetResourceClaims(core.PodResourceClaim{ Name: "../my-claim", @@ -25450,7 +25452,14 @@ func TestValidateDynamicResourceAllocation(t *testing.T) { }(), } for k, v := range failureCases { - if errs := ValidatePodSpec(&v.Spec, nil, field.NewPath("field"), PodValidationOptions{}); len(errs) == 0 { + podMeta := shortPodName + if strings.HasPrefix(k, "static pod") { + podMeta = podMeta.DeepCopy() + podMeta.Annotations = map[string]string{ + core.MirrorPodAnnotationKey: "True", + } + } + if errs := ValidatePodSpec(&v.Spec, podMeta, field.NewPath("field"), PodValidationOptions{}); len(errs) == 0 { t.Errorf("expected failure for %q", k) } } diff --git a/pkg/kubelet/config/common.go b/pkg/kubelet/config/common.go index 69d6712623d..a73d6372a47 100644 --- a/pkg/kubelet/config/common.go +++ b/pkg/kubelet/config/common.go @@ -106,6 +106,9 @@ type defaultFunc func(pod *api.Pod) error // A static pod tried to use a ClusterTrustBundle projected volume source. var ErrStaticPodTriedToUseClusterTrustBundle = errors.New("static pods may not use ClusterTrustBundle projected volume sources") +// A static pod tried to use a resource claim. +var ErrStaticPodTriedToUseResourceClaims = errors.New("static pods may not use ResourceClaims") + // tryDecodeSinglePod takes data and tries to extract valid Pod config information from it. func tryDecodeSinglePod(data []byte, defaultFn defaultFunc) (parsed bool, pod *v1.Pod, err error) { // JSON is valid YAML, so this should work for everything. @@ -152,6 +155,9 @@ func tryDecodeSinglePod(data []byte, defaultFn defaultFunc) (parsed bool, pod *v } } } + if len(v1Pod.Spec.ResourceClaims) > 0 { + return true, nil, ErrStaticPodTriedToUseResourceClaims + } return true, v1Pod, nil } diff --git a/pkg/kubelet/config/common_test.go b/pkg/kubelet/config/common_test.go index c6025d85f98..e58de0c1c9f 100644 --- a/pkg/kubelet/config/common_test.go +++ b/pkg/kubelet/config/common_test.go @@ -180,6 +180,62 @@ func TestDecodeSinglePodRejectsClusterTrustBundleVolumes(t *testing.T) { } } +func TestDecodeSinglePodRejectsResourceClaims(t *testing.T) { + grace := int64(30) + enableServiceLinks := v1.DefaultEnableServiceLinks + pod := &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + UID: "12345", + Namespace: "mynamespace", + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyAlways, + DNSPolicy: v1.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []v1.Container{{ + Name: "image", + Image: "test/image", + ImagePullPolicy: "IfNotPresent", + TerminationMessagePath: "/dev/termination-log", + TerminationMessagePolicy: v1.TerminationMessageReadFile, + SecurityContext: securitycontext.ValidSecurityContextWithContainerDefaults(), + Resources: v1.ResourceRequirements{ + Claims: []v1.ResourceClaim{{ + Name: "my-claim", + }}, + }, + }}, + ResourceClaims: []v1.PodResourceClaim{{ + Name: "my-claim", + ResourceClaimName: ptr.To("some-external-claim"), + }}, + SecurityContext: &v1.PodSecurityContext{}, + SchedulerName: v1.DefaultSchedulerName, + EnableServiceLinks: &enableServiceLinks, + }, + Status: v1.PodStatus{ + PodIP: "1.2.3.4", + PodIPs: []v1.PodIP{ + { + IP: "1.2.3.4", + }, + }, + }, + } + json, err := runtime.Encode(clientscheme.Codecs.LegacyCodec(v1.SchemeGroupVersion), pod) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + _, _, err = tryDecodeSinglePod(json, noDefault) + if !errors.Is(err, ErrStaticPodTriedToUseResourceClaims) { + t.Errorf("Got error %q, want %q", err, ErrStaticPodTriedToUseResourceClaims) + } +} + func TestDecodePodList(t *testing.T) { grace := int64(30) enableServiceLinks := v1.DefaultEnableServiceLinks diff --git a/plugin/pkg/admission/noderestriction/admission.go b/plugin/pkg/admission/noderestriction/admission.go index 9bfcbb2ea2c..b85989dbd28 100644 --- a/plugin/pkg/admission/noderestriction/admission.go +++ b/plugin/pkg/admission/noderestriction/admission.go @@ -335,6 +335,10 @@ func (p *Plugin) admitPodCreate(nodeName string, a admission.Attributes) error { } } + if len(pod.Spec.ResourceClaims) > 0 { + return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference resourceclaims", nodeName)) + } + return nil } diff --git a/plugin/pkg/admission/noderestriction/admission_test.go b/plugin/pkg/admission/noderestriction/admission_test.go index 16c84fba777..0cd7c881f1a 100644 --- a/plugin/pkg/admission/noderestriction/admission_test.go +++ b/plugin/pkg/admission/noderestriction/admission_test.go @@ -574,6 +574,9 @@ func Test_nodePlugin_Admit(t *testing.T) { pvcpod, _ := makeTestPod("ns", "mypvcpod", "mynode", true) pvcpod.Spec.Volumes = []api.Volume{{VolumeSource: api.VolumeSource{PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{ClaimName: "foo"}}}} + claimpod, _ := makeTestPod("ns", "myclaimpod", "mynode", true) + claimpod.Spec.ResourceClaims = []api.PodResourceClaim{{Name: "myclaim", ResourceClaimName: pointer.String("myexternalclaim")}} + tests := []admitTestCase{ // Mirror pods bound to us { @@ -1055,6 +1058,12 @@ func Test_nodePlugin_Admit(t *testing.T) { attributes: admission.NewAttributesRecord(pvcpod, nil, podKind, pvcpod.Namespace, pvcpod.Name, podResource, "", admission.Create, &metav1.CreateOptions{}, false, mynode), err: "reference persistentvolumeclaims", }, + { + name: "forbid create of pod referencing resourceclaim", + podsGetter: noExistingPods, + attributes: admission.NewAttributesRecord(claimpod, nil, podKind, claimpod.Namespace, claimpod.Name, podResource, "", admission.Create, &metav1.CreateOptions{}, false, mynode), + err: "reference resourceclaim", + }, // My node object {