From 1db11c07ff22297b67316e10a13f201a62894df6 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 3 Mar 2023 13:33:20 +0100 Subject: [PATCH] node authorizer: limit kubelet access to ResourceClaim objects As discussed during the KEP and code review of dynamic resource allocation for Kubernetes 1.26, to increase security kubelet should only get read access to those ResourceClaim objects that are needed by a pod on the node. This can be enforced easily by the node authorizer: - the names of the objects are defined by the pod status - there is no indirection as with volumes (pod -> pvc -> pv) Normally the graph only gets updated when a pod is not scheduled yet. Resource claim status changes can still happen after that, so they also must trigger an update. --- plugin/pkg/auth/authorizer/node/graph.go | 17 ++++ .../auth/authorizer/node/graph_populator.go | 28 +++++- .../auth/authorizer/node/node_authorizer.go | 23 +++-- .../authorizer/node/node_authorizer_test.go | 86 ++++++++++++++++--- 4 files changed, 130 insertions(+), 24 deletions(-) diff --git a/plugin/pkg/auth/authorizer/node/graph.go b/plugin/pkg/auth/authorizer/node/graph.go index e7e5337a168..85a4b808426 100644 --- a/plugin/pkg/auth/authorizer/node/graph.go +++ b/plugin/pkg/auth/authorizer/node/graph.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/component-helpers/storage/ephemeral" + "k8s.io/dynamic-resource-allocation/resourceclaim" pvutil "k8s.io/kubernetes/pkg/api/v1/persistentvolume" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/third_party/forked/gonum/graph" @@ -117,6 +118,7 @@ const ( podVertexType pvcVertexType pvVertexType + resourceClaimVertexType secretVertexType vaVertexType serviceAccountVertexType @@ -128,6 +130,7 @@ var vertexTypes = map[vertexType]string{ podVertexType: "pod", pvcVertexType: "pvc", pvVertexType: "pv", + resourceClaimVertexType: "resourceclaim", secretVertexType: "secret", vaVertexType: "volumeattachment", serviceAccountVertexType: "serviceAccount", @@ -393,6 +396,20 @@ func (g *Graph) AddPod(pod *corev1.Pod) { g.addEdgeToDestinationIndex_locked(e) } } + + for _, podResourceClaim := range pod.Spec.ResourceClaims { + claimName, _, err := resourceclaim.Name(pod, &podResourceClaim) + // Do we have a valid claim name? If yes, add an edge that grants + // kubelet access to that claim. An error indicates that a claim + // still needs to be created, nil that intentionally no claim + // was created and never will be because it isn't needed. + if err == nil && claimName != nil { + claimVertex := g.getOrCreateVertex_locked(resourceClaimVertexType, pod.Namespace, *claimName) + e := newDestinationEdge(claimVertex, podVertex, nodeVertex) + g.graph.SetEdge(e) + g.addEdgeToDestinationIndex_locked(e) + } + } } func (g *Graph) DeletePod(name, namespace string) { start := time.Now() diff --git a/plugin/pkg/auth/authorizer/node/graph_populator.go b/plugin/pkg/auth/authorizer/node/graph_populator.go index aff1f80c63c..52a808ef788 100644 --- a/plugin/pkg/auth/authorizer/node/graph_populator.go +++ b/plugin/pkg/auth/authorizer/node/graph_populator.go @@ -78,8 +78,9 @@ func (g *graphPopulator) updatePod(oldObj, obj interface{}) { return } if oldPod, ok := oldObj.(*corev1.Pod); ok && oldPod != nil { - if (pod.Spec.NodeName == oldPod.Spec.NodeName) && (pod.UID == oldPod.UID) { - // Node and uid are unchanged, all object references in the pod spec are immutable + if (pod.Spec.NodeName == oldPod.Spec.NodeName) && (pod.UID == oldPod.UID) && + resourceClaimStatusesEqual(oldPod.Status.ResourceClaimStatuses, pod.Status.ResourceClaimStatuses) { + // Node and uid are unchanged, all object references in the pod spec are immutable respectively unmodified (claim statuses). klog.V(5).Infof("updatePod %s/%s, node unchanged", pod.Namespace, pod.Name) return } @@ -91,6 +92,29 @@ func (g *graphPopulator) updatePod(oldObj, obj interface{}) { klog.V(5).Infof("updatePod %s/%s for node %s completed in %v", pod.Namespace, pod.Name, pod.Spec.NodeName, time.Since(startTime)) } +func resourceClaimStatusesEqual(statusA, statusB []corev1.PodResourceClaimStatus) bool { + if len(statusA) != len(statusB) { + return false + } + // In most cases, status entries only get added once and not modified. + // But this cannot be guaranteed, so for the sake of correctness in all + // cases this code here has to check. + for i := range statusA { + if statusA[i].Name != statusB[i].Name { + return false + } + claimNameA := statusA[i].ResourceClaimName + claimNameB := statusB[i].ResourceClaimName + if (claimNameA == nil) != (claimNameB == nil) { + return false + } + if claimNameA != nil && *claimNameA != *claimNameB { + return false + } + } + return true +} + func (g *graphPopulator) deletePod(obj interface{}) { if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { obj = tombstone.Obj diff --git a/plugin/pkg/auth/authorizer/node/node_authorizer.go b/plugin/pkg/auth/authorizer/node/node_authorizer.go index f3a5ab4339c..b03467ffd73 100644 --- a/plugin/pkg/auth/authorizer/node/node_authorizer.go +++ b/plugin/pkg/auth/authorizer/node/node_authorizer.go @@ -30,6 +30,7 @@ import ( "k8s.io/component-base/featuregate" coordapi "k8s.io/kubernetes/pkg/apis/coordination" api "k8s.io/kubernetes/pkg/apis/core" + resourceapi "k8s.io/kubernetes/pkg/apis/resource" storageapi "k8s.io/kubernetes/pkg/apis/storage" "k8s.io/kubernetes/pkg/auth/nodeidentifier" "k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac" @@ -40,7 +41,7 @@ import ( // NodeAuthorizer authorizes requests from kubelets, with the following logic: // 1. If a request is not from a node (NodeIdentity() returns isNode=false), reject // 2. If a specific node cannot be identified (NodeIdentity() returns nodeName=""), reject -// 3. If a request is for a secret, configmap, persistent volume or persistent volume claim, reject unless the verb is get, and the requested object is related to the requesting node: +// 3. If a request is for a secret, configmap, persistent volume, resource claim, or persistent volume claim, reject unless the verb is get, and the requested object is related to the requesting node: // node <- configmap // node <- pod // node <- pod <- secret @@ -48,6 +49,7 @@ import ( // node <- pod <- pvc // node <- pod <- pvc <- pv // node <- pod <- pvc <- pv <- secret +// node <- pod <- ResourceClaim // 4. For other resources, authorize all nodes uniformly using statically defined rules type NodeAuthorizer struct { graph *Graph @@ -72,14 +74,15 @@ func NewAuthorizer(graph *Graph, identifier nodeidentifier.NodeIdentifier, rules } var ( - configMapResource = api.Resource("configmaps") - secretResource = api.Resource("secrets") - pvcResource = api.Resource("persistentvolumeclaims") - pvResource = api.Resource("persistentvolumes") - vaResource = storageapi.Resource("volumeattachments") - svcAcctResource = api.Resource("serviceaccounts") - leaseResource = coordapi.Resource("leases") - csiNodeResource = storageapi.Resource("csinodes") + configMapResource = api.Resource("configmaps") + secretResource = api.Resource("secrets") + pvcResource = api.Resource("persistentvolumeclaims") + pvResource = api.Resource("persistentvolumes") + resourceClaimResource = resourceapi.Resource("resourceclaims") + vaResource = storageapi.Resource("volumeattachments") + svcAcctResource = api.Resource("serviceaccounts") + leaseResource = coordapi.Resource("leases") + csiNodeResource = storageapi.Resource("csinodes") ) func (r *NodeAuthorizer) RulesFor(user user.Info, namespace string) ([]authorizer.ResourceRuleInfo, []authorizer.NonResourceRuleInfo, bool, error) { @@ -117,6 +120,8 @@ func (r *NodeAuthorizer) Authorize(ctx context.Context, attrs authorizer.Attribu return r.authorizeGet(nodeName, pvcVertexType, attrs) case pvResource: return r.authorizeGet(nodeName, pvVertexType, attrs) + case resourceClaimResource: + return r.authorizeGet(nodeName, resourceClaimVertexType, attrs) case vaResource: return r.authorizeGet(nodeName, vaVertexType, attrs) case svcAcctResource: diff --git a/plugin/pkg/auth/authorizer/node/node_authorizer_test.go b/plugin/pkg/auth/authorizer/node/node_authorizer_test.go index 921a22ca126..b3f02581cee 100644 --- a/plugin/pkg/auth/authorizer/node/node_authorizer_test.go +++ b/plugin/pkg/auth/authorizer/node/node_authorizer_test.go @@ -42,16 +42,19 @@ func TestAuthorizer(t *testing.T) { g := NewGraph() opts := &sampleDataOpts{ - nodes: 2, - namespaces: 2, - podsPerNode: 2, - attachmentsPerNode: 1, - sharedConfigMapsPerPod: 0, - uniqueConfigMapsPerPod: 1, - sharedSecretsPerPod: 1, - uniqueSecretsPerPod: 1, - sharedPVCsPerPod: 0, - uniquePVCsPerPod: 1, + nodes: 2, + namespaces: 2, + podsPerNode: 2, + attachmentsPerNode: 1, + sharedConfigMapsPerPod: 0, + uniqueConfigMapsPerPod: 1, + sharedSecretsPerPod: 1, + uniqueSecretsPerPod: 1, + sharedPVCsPerPod: 0, + uniquePVCsPerPod: 1, + uniqueResourceClaimsPerPod: 1, + uniqueResourceClaimTemplatesPerPod: 1, + uniqueResourceClaimTemplatesWithClaimPerPod: 1, } nodes, pods, pvs, attachments := generate(opts) populate(g, nodes, pods, pvs, attachments) @@ -117,6 +120,16 @@ func TestAuthorizer(t *testing.T) { attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "persistentvolumeclaims", Name: "pvc0-pod0-node0", Namespace: "ns0"}, expect: authorizer.DecisionAllow, }, + { + name: "allowed resource claim", + attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "resourceclaims", APIGroup: "resource.k8s.io", Name: "claim0-pod0-node0-ns0", Namespace: "ns0"}, + expect: authorizer.DecisionAllow, + }, + { + name: "allowed resource claim with template", + attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "resourceclaims", APIGroup: "resource.k8s.io", Name: "generated-claim-pod0-node0-ns0-0", Namespace: "ns0"}, + expect: authorizer.DecisionAllow, + }, { name: "allowed pv", attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "persistentvolumes", Name: "pv0-pod0-node0-ns0", Namespace: ""}, @@ -142,6 +155,16 @@ func TestAuthorizer(t *testing.T) { attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "persistentvolumeclaims", Name: "pvc0-pod0-node1", Namespace: "ns0"}, expect: authorizer.DecisionNoOpinion, }, + { + name: "disallowed resource claim, other node", + attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "resourceclaims", APIGroup: "resource.k8s.io", Name: "claim0-pod0-node1-ns0", Namespace: "ns0"}, + expect: authorizer.DecisionNoOpinion, + }, + { + name: "disallowed resource claim with template", + attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "resourceclaims", APIGroup: "resource.k8s.io", Name: "pod0-node1-claimtemplate0", Namespace: "ns0"}, + expect: authorizer.DecisionNoOpinion, + }, { name: "disallowed pv", attrs: authorizer.AttributesRecord{User: node0, ResourceRequest: true, Verb: "get", Resource: "persistentvolumes", Name: "pv0-pod0-node1-ns0", Namespace: ""}, @@ -468,9 +491,12 @@ type sampleDataOpts struct { sharedSecretsPerPod int sharedPVCsPerPod int - uniqueSecretsPerPod int - uniqueConfigMapsPerPod int - uniquePVCsPerPod int + uniqueSecretsPerPod int + uniqueConfigMapsPerPod int + uniquePVCsPerPod int + uniqueResourceClaimsPerPod int + uniqueResourceClaimTemplatesPerPod int + uniqueResourceClaimTemplatesWithClaimPerPod int } func BenchmarkPopulationAllocation(b *testing.B) { @@ -845,6 +871,40 @@ func generatePod(name, namespace, nodeName, svcAccountName string, opts *sampleD PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ClaimName: pv.Spec.ClaimRef.Name}, }}) } + for i := 0; i < opts.uniqueResourceClaimsPerPod; i++ { + claimName := fmt.Sprintf("claim%d-%s-%s", i, pod.Name, pod.Namespace) + pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, corev1.PodResourceClaim{ + Name: fmt.Sprintf("claim%d", i), + Source: corev1.ClaimSource{ + ResourceClaimName: &claimName, + }, + }) + } + for i := 0; i < opts.uniqueResourceClaimTemplatesPerPod; i++ { + claimTemplateName := fmt.Sprintf("claimtemplate%d-%s-%s", i, pod.Name, pod.Namespace) + podClaimName := fmt.Sprintf("claimtemplate%d", i) + pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, corev1.PodResourceClaim{ + Name: podClaimName, + Source: corev1.ClaimSource{ + ResourceClaimTemplateName: &claimTemplateName, + }, + }) + } + for i := 0; i < opts.uniqueResourceClaimTemplatesWithClaimPerPod; i++ { + claimTemplateName := fmt.Sprintf("claimtemplate%d-%s-%s", i, pod.Name, pod.Namespace) + podClaimName := fmt.Sprintf("claimtemplate-with-claim%d", i) + claimName := fmt.Sprintf("generated-claim-%s-%s-%d", pod.Name, pod.Namespace, i) + pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims, corev1.PodResourceClaim{ + Name: podClaimName, + Source: corev1.ClaimSource{ + ResourceClaimTemplateName: &claimTemplateName, + }, + }) + pod.Status.ResourceClaimStatuses = append(pod.Status.ResourceClaimStatuses, corev1.PodResourceClaimStatus{ + Name: podClaimName, + ResourceClaimName: &claimName, + }) + } // Choose shared pvcs randomly from shared pvcs in a namespace. subset = randomSubset(opts.sharedPVCsPerPod, opts.sharedPVCsPerNamespace) for _, i := range subset {