diff --git a/test/e2e/dra/deploy.go b/test/e2e/dra/deploy.go index 8c5d86c09d4..61682914365 100644 --- a/test/e2e/dra/deploy.go +++ b/test/e2e/dra/deploy.go @@ -19,6 +19,7 @@ package dra import ( "bytes" "context" + _ "embed" "errors" "fmt" "net" @@ -38,10 +39,19 @@ import ( v1 "k8s.io/api/core/v1" resourcev1alpha2 "k8s.io/api/resource/v1alpha2" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/selection" + "k8s.io/apiserver/pkg/authentication/serviceaccount" + "k8s.io/client-go/discovery/cached/memory" resourceapiinformer "k8s.io/client-go/informers/resource/v1alpha2" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/restmapper" "k8s.io/client-go/tools/cache" "k8s.io/dynamic-resource-allocation/kubeletplugin" "k8s.io/klog/v2" @@ -52,6 +62,7 @@ import ( e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/storage/drivers/proxy" "k8s.io/kubernetes/test/e2e/storage/utils" + "sigs.k8s.io/yaml" ) const ( @@ -63,10 +74,14 @@ type Nodes struct { NodeNames []string } +//go:embed test-driver/deploy/example/plugin-permissions.yaml +var pluginPermissions string + // NewNodes selects nodes to run the test on. func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes { nodes := &Nodes{} ginkgo.BeforeEach(func(ctx context.Context) { + ginkgo.By("selecting nodes") // The kubelet plugin is harder. We deploy the builtin manifest // after patching in the driver name and all nodes on which we @@ -166,15 +181,19 @@ type MethodInstance struct { } type Driver struct { - f *framework.Framework - ctx context.Context - cleanup []func() // executed first-in-first-out - wg sync.WaitGroup + f *framework.Framework + ctx context.Context + cleanup []func() // executed first-in-first-out + wg sync.WaitGroup + serviceAccountName string NameSuffix string Controller *app.ExampleController Name string - Nodes map[string]*app.ExamplePlugin + + // Nodes contains entries for each node selected for a test when the test runs. + // In addition, there is one entry for a fictional node. + Nodes map[string]KubeletPlugin parameterMode parameterMode parameterAPIGroup string @@ -189,6 +208,11 @@ type Driver struct { callCounts map[MethodInstance]int64 } +type KubeletPlugin struct { + *app.ExamplePlugin + ClientSet kubernetes.Interface +} + type parameterMode string const ( @@ -199,7 +223,7 @@ const ( func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames)) - d.Nodes = map[string]*app.ExamplePlugin{} + d.Nodes = make(map[string]KubeletPlugin) d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io" resources.DriverName = d.Name @@ -250,6 +274,13 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { framework.Failf("unknown test driver parameter mode: %s", d.parameterMode) } + // Create service account and corresponding RBAC rules. + d.serviceAccountName = "dra-kubelet-plugin-" + d.Name + "-service-account" + content := pluginPermissions + content = strings.ReplaceAll(content, "dra-kubelet-plugin-namespace", d.f.Namespace.Name) + content = strings.ReplaceAll(content, "dra-kubelet-plugin", "dra-kubelet-plugin-"+d.Name) + d.createFromYAML(ctx, []byte(content), d.f.Namespace.Name) + instanceKey := "app.kubernetes.io/instance" rsName := "" draAddr := path.Join(framework.TestContext.KubeletRootDir, "plugins", d.Name+".sock") @@ -262,6 +293,7 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { item.Spec.Replicas = &numNodes item.Spec.Selector.MatchLabels[instanceKey] = d.Name item.Spec.Template.Labels[instanceKey] = d.Name + item.Spec.Template.Spec.ServiceAccountName = d.serviceAccountName item.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution[0].LabelSelector.MatchLabels[instanceKey] = d.Name item.Spec.Template.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ @@ -305,15 +337,31 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { framework.ExpectNoError(err, "list proxy pods") gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods") - // Run registar and plugin for each of the pods. + // Run registrar and plugin for each of the pods. for _, pod := range pods.Items { // Need a local variable, not the loop variable, for the anonymous // callback functions below. pod := pod nodename := pod.Spec.NodeName + + // Authenticate the plugin so that it has the exact same + // permissions as the daemonset pod. This includes RBAC and a + // validating admission policy which limits writes to per-node + // ResourceSlices. + // + // We could retrieve + // /var/run/secrets/kubernetes.io/serviceaccount/token from + // each pod and use it. That would check that + // ServiceAccountTokenNodeBindingValidation works. But that's + // better covered by a test owned by SIG Auth (like the one in + // https://github.com/kubernetes/kubernetes/pull/124711). + // + // Here we merely use impersonation, which is faster. + driverClient := d.impersonateKubeletPlugin(&pod) + logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod)) loggerCtx := klog.NewContext(ctx, logger) - plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, d.f.ClientSet, nodename, + plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename, app.FileOperations{ Create: func(name string, content []byte) error { klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content)) @@ -342,7 +390,7 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { // Depends on cancel being called first. plugin.Stop() }) - d.Nodes[nodename] = plugin + d.Nodes[nodename] = KubeletPlugin{ExamplePlugin: plugin, ClientSet: driverClient} } // Wait for registration. @@ -359,6 +407,26 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { }).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "hosts where the plugin has not been registered yet") } +func (d *Driver) impersonateKubeletPlugin(pod *v1.Pod) kubernetes.Interface { + ginkgo.GinkgoHelper() + driverUserInfo := (&serviceaccount.ServiceAccountInfo{ + Name: d.serviceAccountName, + Namespace: pod.Namespace, + NodeName: pod.Spec.NodeName, + PodName: pod.Name, + PodUID: string(pod.UID), + }).UserInfo() + driverClientConfig := d.f.ClientConfig() + driverClientConfig.Impersonate = rest.ImpersonationConfig{ + UserName: driverUserInfo.GetName(), + Groups: driverUserInfo.GetGroups(), + Extra: driverUserInfo.GetExtra(), + } + driverClient, err := kubernetes.NewForConfig(driverClientConfig) + framework.ExpectNoError(err, "create client for driver") + return driverClient +} + func (d *Driver) createFile(pod *v1.Pod, name string, content []byte) error { buffer := bytes.NewBuffer(content) // Writing the content can be slow. Better create a temporary file and @@ -375,6 +443,57 @@ func (d *Driver) removeFile(pod *v1.Pod, name string) error { return d.podIO(pod).RemoveAll(name) } +func (d *Driver) createFromYAML(ctx context.Context, content []byte, namespace string) { + // Not caching the discovery result isn't very efficient, but good enough. + discoveryCache := memory.NewMemCacheClient(d.f.ClientSet.Discovery()) + restMapper := restmapper.NewDeferredDiscoveryRESTMapper(discoveryCache) + + for _, content := range bytes.Split(content, []byte("---\n")) { + if len(content) == 0 { + continue + } + + var obj *unstructured.Unstructured + framework.ExpectNoError(yaml.UnmarshalStrict(content, &obj), fmt.Sprintf("Full YAML:\n%s\n", string(content))) + + gv, err := schema.ParseGroupVersion(obj.GetAPIVersion()) + framework.ExpectNoError(err, fmt.Sprintf("extract group+version from object %q", klog.KObj(obj))) + gk := schema.GroupKind{Group: gv.Group, Kind: obj.GetKind()} + + mapping, err := restMapper.RESTMapping(gk, gv.Version) + framework.ExpectNoError(err, fmt.Sprintf("map %q to resource", gk)) + + resourceClient := d.f.DynamicClient.Resource(mapping.Resource) + options := metav1.CreateOptions{ + // If the YAML input is invalid, then we want the + // apiserver to tell us via an error. This can + // happen because decoding into an unstructured object + // doesn't validate. + FieldValidation: "Strict", + } + switch mapping.Scope.Name() { + case meta.RESTScopeNameRoot: + _, err = resourceClient.Create(ctx, obj, options) + case meta.RESTScopeNameNamespace: + if namespace == "" { + framework.Failf("need namespace for object type %s", gk) + } + _, err = resourceClient.Namespace(namespace).Create(ctx, obj, options) + } + framework.ExpectNoError(err, "create object") + ginkgo.DeferCleanup(func(ctx context.Context) { + del := resourceClient.Delete + if mapping.Scope.Name() == meta.RESTScopeNameNamespace { + del = resourceClient.Namespace(namespace).Delete + } + err := del(ctx, obj.GetName(), metav1.DeleteOptions{}) + if !apierrors.IsNotFound(err) { + framework.ExpectNoError(err, fmt.Sprintf("deleting %s.%s %s", obj.GetKind(), obj.GetAPIVersion(), klog.KObj(obj))) + } + }) + } +} + func (d *Driver) podIO(pod *v1.Pod) proxy.PodDirIO { logger := klog.Background() return proxy.PodDirIO{ diff --git a/test/e2e/dra/dra.go b/test/e2e/dra/dra.go index 1c507699636..1ac7e5b6e75 100644 --- a/test/e2e/dra/dra.go +++ b/test/e2e/dra/dra.go @@ -29,6 +29,7 @@ import ( "github.com/onsi/gomega" "github.com/onsi/gomega/gcustom" "github.com/onsi/gomega/gstruct" + "github.com/onsi/gomega/types" v1 "k8s.io/api/core/v1" resourcev1alpha2 "k8s.io/api/resource/v1alpha2" @@ -891,6 +892,96 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, driver := NewDriver(f, nodes, perNode(1, nodes)) driver.parameterMode = parameterModeStructured + f.It("must apply per-node permission checks", func(ctx context.Context) { + // All of the operations use the client set of a kubelet plugin for + // a fictional node which both don't exist, so nothing interferes + // when we actually manage to create a slice. + fictionalNodeName := "dra-fictional-node" + gomega.Expect(nodes.NodeNames).NotTo(gomega.ContainElement(fictionalNodeName)) + fictionalNodeClient := driver.impersonateKubeletPlugin(&v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fictionalNodeName + "-dra-plugin", + Namespace: f.Namespace.Name, + UID: "12345", + }, + Spec: v1.PodSpec{ + NodeName: fictionalNodeName, + }, + }) + + // This is for some actual node in the cluster. + realNodeName := nodes.NodeNames[0] + realNodeClient := driver.Nodes[realNodeName].ClientSet + + // This is the slice that we try to create. It needs to be deleted + // after testing, if it still exists at that time. + fictionalNodeSlice := &resourcev1alpha2.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: fictionalNodeName + "-slice", + }, + NodeName: fictionalNodeName, + DriverName: "dra.example.com", + ResourceModel: resourcev1alpha2.ResourceModel{ + NamedResources: &resourcev1alpha2.NamedResourcesResources{}, + }, + } + ginkgo.DeferCleanup(func(ctx context.Context) { + err := f.ClientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, fictionalNodeSlice.Name, metav1.DeleteOptions{}) + if !apierrors.IsNotFound(err) { + framework.ExpectNoError(err) + } + }) + + // Message from test-driver/deploy/example/plugin-permissions.yaml + matchVAPDeniedError := gomega.MatchError(gomega.ContainSubstring("may only modify resourceslices that belong to the node the pod is running on")) + + mustCreate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) *resourcev1alpha2.ResourceSlice { + ginkgo.GinkgoHelper() + slice, err := clientSet.ResourceV1alpha2().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{}) + framework.ExpectNoError(err, fmt.Sprintf("CREATE: %s + %s", clientName, slice.Name)) + return slice + } + mustUpdate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) *resourcev1alpha2.ResourceSlice { + ginkgo.GinkgoHelper() + slice, err := clientSet.ResourceV1alpha2().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{}) + framework.ExpectNoError(err, fmt.Sprintf("UPDATE: %s + %s", clientName, slice.Name)) + return slice + } + mustDelete := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) { + ginkgo.GinkgoHelper() + err := clientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err, fmt.Sprintf("DELETE: %s + %s", clientName, slice.Name)) + } + mustFailToCreate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) { + ginkgo.GinkgoHelper() + _, err := clientSet.ResourceV1alpha2().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{}) + gomega.Expect(err).To(matchError, fmt.Sprintf("CREATE: %s + %s", clientName, slice.Name)) + } + mustFailToUpdate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) { + ginkgo.GinkgoHelper() + _, err := clientSet.ResourceV1alpha2().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{}) + gomega.Expect(err).To(matchError, fmt.Sprintf("UPDATE: %s + %s", clientName, slice.Name)) + } + mustFailToDelete := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) { + ginkgo.GinkgoHelper() + err := clientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{}) + gomega.Expect(err).To(matchError, fmt.Sprintf("DELETE: %s + %s", clientName, slice.Name)) + } + + // Create with different clients, keep it in the end. + mustFailToCreate(realNodeClient, "real plugin", fictionalNodeSlice, matchVAPDeniedError) + createdFictionalNodeSlice := mustCreate(fictionalNodeClient, "fictional plugin", fictionalNodeSlice) + + // Update with different clients. + mustFailToUpdate(realNodeClient, "real plugin", createdFictionalNodeSlice, matchVAPDeniedError) + createdFictionalNodeSlice = mustUpdate(fictionalNodeClient, "fictional plugin", createdFictionalNodeSlice) + createdFictionalNodeSlice = mustUpdate(f.ClientSet, "admin", createdFictionalNodeSlice) + + // Delete with different clients. + mustFailToDelete(realNodeClient, "real plugin", createdFictionalNodeSlice, matchVAPDeniedError) + mustDelete(fictionalNodeClient, "fictional plugin", createdFictionalNodeSlice) + }) + f.It("must manage ResourceSlices", f.WithSlow(), func(ctx context.Context) { driverName := driver.Name @@ -1100,6 +1191,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, b := newBuilder(f, driver) preScheduledTests(b, driver, resourcev1alpha2.AllocationModeImmediate) claimTests(b, driver, resourcev1alpha2.AllocationModeImmediate) + }) }) diff --git a/test/e2e/dra/test-driver/deploy/example/plugin-permissions.yaml b/test/e2e/dra/test-driver/deploy/example/plugin-permissions.yaml new file mode 100644 index 00000000000..80e12ee5e4c --- /dev/null +++ b/test/e2e/dra/test-driver/deploy/example/plugin-permissions.yaml @@ -0,0 +1,84 @@ +# Real driver deployments must replace all occurrences of "dra-kubelet-plugin" +# with something specific to their driver. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dra-kubelet-plugin-service-account + namespace: dra-kubelet-plugin-namespace +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dra-kubelet-plugin-role +rules: +- apiGroups: ["resource.k8s.io"] + resources: ["resourceclaims"] + verbs: ["get"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] +- apiGroups: ["resource.k8s.io"] + resources: ["resourceslices"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: dra-kubelet-plugin-role-binding +subjects: +- kind: ServiceAccount + name: dra-kubelet-plugin-service-account + namespace: dra-kubelet-plugin-namespace +roleRef: + kind: ClusterRole + name: dra-kubelet-plugin-role + apiGroup: rbac.authorization.k8s.io +--- +# This ValidatingAdmissionPolicy is specific to the DRA driver's kubelet plugin +# because it checks the ServiceAccount defined for it above. An admin could +# also define a single policy for all drivers. +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: resourceslices-policy-dra-kubelet-plugin +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: ["resource.k8s.io"] + apiVersions: ["v1alpha2"] + operations: ["CREATE", "UPDATE", "DELETE"] + resources: ["resourceslices"] + variables: + - name: hasNodeName + expression: >- + "authentication.kubernetes.io/node-name" in request.userInfo.extra + - name: isKubeletPlugin + expression: >- + request.userInfo.username == "system:serviceaccount:dra-kubelet-plugin-namespace:dra-kubelet-plugin-service-account" + - name: objectNodeName + expression: >- + (request.operation == "DELETE" ? oldObject : object).?nodeName.orValue("") + validations: + - expression: >- + !variables.isKubeletPlugin || variables.hasNodeName + message: This user must have a "authentication.kubernetes.io/node-name" claim. ServiceAccountTokenNodeBindingValidation must be enabled in the cluster. + - expression: >- + !variables.isKubeletPlugin || !variables.hasNodeName || + variables.objectNodeName == request.userInfo.extra["authentication.kubernetes.io/node-name"][0] + message: This DRA kubelet plugin may only modify resourceslices that belong to the node the pod is running on. + # This is useful for debugging. Can be dropped in a production deployment. + messageExpression: >- + "The DRA kubelet plugin on node " + request.userInfo.extra["authentication.kubernetes.io/node-name"][0] + + " may only modify resourceslices that belong to the node the pod is running on, not " + + (variables.objectNodeName == "" ? variables.objectNodeName : "a cluster-scoped slice") + "." +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: resourceslices-policy-dra-kubelet-plugin +spec: + policyName: resourceslices-policy-dra-kubelet-plugin + validationActions: [Deny] + # All ResourceSlices are matched.