dra e2e: demonstrate how to use RBAC + VAP for a kubelet plugin

In reality, the kubelet plugin of a DRA driver is meant to be deployed as a
daemonset with a service account that limits its
permissions. https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/#additional-metadata-in-pod-bound-tokens
ensures that the node name is bound to the pod, which then can be used
in a validating admission policy (VAP) to ensure that the operations are
limited to the node.

In E2E testing, we emulate that via impersonation. This ensures that the plugin
does not accidentally depend on additional permissions.
This commit is contained in:
Patrick Ohly 2024-06-06 09:40:06 +02:00
parent 348f94ab55
commit ee3205804b
3 changed files with 304 additions and 9 deletions

View File

@ -19,6 +19,7 @@ package dra
import (
"bytes"
"context"
_ "embed"
"errors"
"fmt"
"net"
@ -38,10 +39,19 @@ import (
v1 "k8s.io/api/core/v1"
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apiserver/pkg/authentication/serviceaccount"
"k8s.io/client-go/discovery/cached/memory"
resourceapiinformer "k8s.io/client-go/informers/resource/v1alpha2"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/restmapper"
"k8s.io/client-go/tools/cache"
"k8s.io/dynamic-resource-allocation/kubeletplugin"
"k8s.io/klog/v2"
@ -52,6 +62,7 @@ import (
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
"k8s.io/kubernetes/test/e2e/storage/drivers/proxy"
"k8s.io/kubernetes/test/e2e/storage/utils"
"sigs.k8s.io/yaml"
)
const (
@ -63,10 +74,14 @@ type Nodes struct {
NodeNames []string
}
//go:embed test-driver/deploy/example/plugin-permissions.yaml
var pluginPermissions string
// NewNodes selects nodes to run the test on.
func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
nodes := &Nodes{}
ginkgo.BeforeEach(func(ctx context.Context) {
ginkgo.By("selecting nodes")
// The kubelet plugin is harder. We deploy the builtin manifest
// after patching in the driver name and all nodes on which we
@ -166,15 +181,19 @@ type MethodInstance struct {
}
type Driver struct {
f *framework.Framework
ctx context.Context
cleanup []func() // executed first-in-first-out
wg sync.WaitGroup
f *framework.Framework
ctx context.Context
cleanup []func() // executed first-in-first-out
wg sync.WaitGroup
serviceAccountName string
NameSuffix string
Controller *app.ExampleController
Name string
Nodes map[string]*app.ExamplePlugin
// Nodes contains entries for each node selected for a test when the test runs.
// In addition, there is one entry for a fictional node.
Nodes map[string]KubeletPlugin
parameterMode parameterMode
parameterAPIGroup string
@ -189,6 +208,11 @@ type Driver struct {
callCounts map[MethodInstance]int64
}
type KubeletPlugin struct {
*app.ExamplePlugin
ClientSet kubernetes.Interface
}
type parameterMode string
const (
@ -199,7 +223,7 @@ const (
func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames))
d.Nodes = map[string]*app.ExamplePlugin{}
d.Nodes = make(map[string]KubeletPlugin)
d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io"
resources.DriverName = d.Name
@ -250,6 +274,13 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
framework.Failf("unknown test driver parameter mode: %s", d.parameterMode)
}
// Create service account and corresponding RBAC rules.
d.serviceAccountName = "dra-kubelet-plugin-" + d.Name + "-service-account"
content := pluginPermissions
content = strings.ReplaceAll(content, "dra-kubelet-plugin-namespace", d.f.Namespace.Name)
content = strings.ReplaceAll(content, "dra-kubelet-plugin", "dra-kubelet-plugin-"+d.Name)
d.createFromYAML(ctx, []byte(content), d.f.Namespace.Name)
instanceKey := "app.kubernetes.io/instance"
rsName := ""
draAddr := path.Join(framework.TestContext.KubeletRootDir, "plugins", d.Name+".sock")
@ -262,6 +293,7 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
item.Spec.Replicas = &numNodes
item.Spec.Selector.MatchLabels[instanceKey] = d.Name
item.Spec.Template.Labels[instanceKey] = d.Name
item.Spec.Template.Spec.ServiceAccountName = d.serviceAccountName
item.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution[0].LabelSelector.MatchLabels[instanceKey] = d.Name
item.Spec.Template.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
@ -305,15 +337,31 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
framework.ExpectNoError(err, "list proxy pods")
gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods")
// Run registar and plugin for each of the pods.
// Run registrar and plugin for each of the pods.
for _, pod := range pods.Items {
// Need a local variable, not the loop variable, for the anonymous
// callback functions below.
pod := pod
nodename := pod.Spec.NodeName
// Authenticate the plugin so that it has the exact same
// permissions as the daemonset pod. This includes RBAC and a
// validating admission policy which limits writes to per-node
// ResourceSlices.
//
// We could retrieve
// /var/run/secrets/kubernetes.io/serviceaccount/token from
// each pod and use it. That would check that
// ServiceAccountTokenNodeBindingValidation works. But that's
// better covered by a test owned by SIG Auth (like the one in
// https://github.com/kubernetes/kubernetes/pull/124711).
//
// Here we merely use impersonation, which is faster.
driverClient := d.impersonateKubeletPlugin(&pod)
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod))
loggerCtx := klog.NewContext(ctx, logger)
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, d.f.ClientSet, nodename,
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename,
app.FileOperations{
Create: func(name string, content []byte) error {
klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
@ -342,7 +390,7 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
// Depends on cancel being called first.
plugin.Stop()
})
d.Nodes[nodename] = plugin
d.Nodes[nodename] = KubeletPlugin{ExamplePlugin: plugin, ClientSet: driverClient}
}
// Wait for registration.
@ -359,6 +407,26 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
}).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "hosts where the plugin has not been registered yet")
}
func (d *Driver) impersonateKubeletPlugin(pod *v1.Pod) kubernetes.Interface {
ginkgo.GinkgoHelper()
driverUserInfo := (&serviceaccount.ServiceAccountInfo{
Name: d.serviceAccountName,
Namespace: pod.Namespace,
NodeName: pod.Spec.NodeName,
PodName: pod.Name,
PodUID: string(pod.UID),
}).UserInfo()
driverClientConfig := d.f.ClientConfig()
driverClientConfig.Impersonate = rest.ImpersonationConfig{
UserName: driverUserInfo.GetName(),
Groups: driverUserInfo.GetGroups(),
Extra: driverUserInfo.GetExtra(),
}
driverClient, err := kubernetes.NewForConfig(driverClientConfig)
framework.ExpectNoError(err, "create client for driver")
return driverClient
}
func (d *Driver) createFile(pod *v1.Pod, name string, content []byte) error {
buffer := bytes.NewBuffer(content)
// Writing the content can be slow. Better create a temporary file and
@ -375,6 +443,57 @@ func (d *Driver) removeFile(pod *v1.Pod, name string) error {
return d.podIO(pod).RemoveAll(name)
}
func (d *Driver) createFromYAML(ctx context.Context, content []byte, namespace string) {
// Not caching the discovery result isn't very efficient, but good enough.
discoveryCache := memory.NewMemCacheClient(d.f.ClientSet.Discovery())
restMapper := restmapper.NewDeferredDiscoveryRESTMapper(discoveryCache)
for _, content := range bytes.Split(content, []byte("---\n")) {
if len(content) == 0 {
continue
}
var obj *unstructured.Unstructured
framework.ExpectNoError(yaml.UnmarshalStrict(content, &obj), fmt.Sprintf("Full YAML:\n%s\n", string(content)))
gv, err := schema.ParseGroupVersion(obj.GetAPIVersion())
framework.ExpectNoError(err, fmt.Sprintf("extract group+version from object %q", klog.KObj(obj)))
gk := schema.GroupKind{Group: gv.Group, Kind: obj.GetKind()}
mapping, err := restMapper.RESTMapping(gk, gv.Version)
framework.ExpectNoError(err, fmt.Sprintf("map %q to resource", gk))
resourceClient := d.f.DynamicClient.Resource(mapping.Resource)
options := metav1.CreateOptions{
// If the YAML input is invalid, then we want the
// apiserver to tell us via an error. This can
// happen because decoding into an unstructured object
// doesn't validate.
FieldValidation: "Strict",
}
switch mapping.Scope.Name() {
case meta.RESTScopeNameRoot:
_, err = resourceClient.Create(ctx, obj, options)
case meta.RESTScopeNameNamespace:
if namespace == "" {
framework.Failf("need namespace for object type %s", gk)
}
_, err = resourceClient.Namespace(namespace).Create(ctx, obj, options)
}
framework.ExpectNoError(err, "create object")
ginkgo.DeferCleanup(func(ctx context.Context) {
del := resourceClient.Delete
if mapping.Scope.Name() == meta.RESTScopeNameNamespace {
del = resourceClient.Namespace(namespace).Delete
}
err := del(ctx, obj.GetName(), metav1.DeleteOptions{})
if !apierrors.IsNotFound(err) {
framework.ExpectNoError(err, fmt.Sprintf("deleting %s.%s %s", obj.GetKind(), obj.GetAPIVersion(), klog.KObj(obj)))
}
})
}
}
func (d *Driver) podIO(pod *v1.Pod) proxy.PodDirIO {
logger := klog.Background()
return proxy.PodDirIO{

View File

@ -29,6 +29,7 @@ import (
"github.com/onsi/gomega"
"github.com/onsi/gomega/gcustom"
"github.com/onsi/gomega/gstruct"
"github.com/onsi/gomega/types"
v1 "k8s.io/api/core/v1"
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
@ -891,6 +892,96 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
driver := NewDriver(f, nodes, perNode(1, nodes))
driver.parameterMode = parameterModeStructured
f.It("must apply per-node permission checks", func(ctx context.Context) {
// All of the operations use the client set of a kubelet plugin for
// a fictional node which both don't exist, so nothing interferes
// when we actually manage to create a slice.
fictionalNodeName := "dra-fictional-node"
gomega.Expect(nodes.NodeNames).NotTo(gomega.ContainElement(fictionalNodeName))
fictionalNodeClient := driver.impersonateKubeletPlugin(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fictionalNodeName + "-dra-plugin",
Namespace: f.Namespace.Name,
UID: "12345",
},
Spec: v1.PodSpec{
NodeName: fictionalNodeName,
},
})
// This is for some actual node in the cluster.
realNodeName := nodes.NodeNames[0]
realNodeClient := driver.Nodes[realNodeName].ClientSet
// This is the slice that we try to create. It needs to be deleted
// after testing, if it still exists at that time.
fictionalNodeSlice := &resourcev1alpha2.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: fictionalNodeName + "-slice",
},
NodeName: fictionalNodeName,
DriverName: "dra.example.com",
ResourceModel: resourcev1alpha2.ResourceModel{
NamedResources: &resourcev1alpha2.NamedResourcesResources{},
},
}
ginkgo.DeferCleanup(func(ctx context.Context) {
err := f.ClientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, fictionalNodeSlice.Name, metav1.DeleteOptions{})
if !apierrors.IsNotFound(err) {
framework.ExpectNoError(err)
}
})
// Message from test-driver/deploy/example/plugin-permissions.yaml
matchVAPDeniedError := gomega.MatchError(gomega.ContainSubstring("may only modify resourceslices that belong to the node the pod is running on"))
mustCreate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) *resourcev1alpha2.ResourceSlice {
ginkgo.GinkgoHelper()
slice, err := clientSet.ResourceV1alpha2().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
framework.ExpectNoError(err, fmt.Sprintf("CREATE: %s + %s", clientName, slice.Name))
return slice
}
mustUpdate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) *resourcev1alpha2.ResourceSlice {
ginkgo.GinkgoHelper()
slice, err := clientSet.ResourceV1alpha2().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{})
framework.ExpectNoError(err, fmt.Sprintf("UPDATE: %s + %s", clientName, slice.Name))
return slice
}
mustDelete := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice) {
ginkgo.GinkgoHelper()
err := clientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{})
framework.ExpectNoError(err, fmt.Sprintf("DELETE: %s + %s", clientName, slice.Name))
}
mustFailToCreate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) {
ginkgo.GinkgoHelper()
_, err := clientSet.ResourceV1alpha2().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
gomega.Expect(err).To(matchError, fmt.Sprintf("CREATE: %s + %s", clientName, slice.Name))
}
mustFailToUpdate := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) {
ginkgo.GinkgoHelper()
_, err := clientSet.ResourceV1alpha2().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{})
gomega.Expect(err).To(matchError, fmt.Sprintf("UPDATE: %s + %s", clientName, slice.Name))
}
mustFailToDelete := func(clientSet kubernetes.Interface, clientName string, slice *resourcev1alpha2.ResourceSlice, matchError types.GomegaMatcher) {
ginkgo.GinkgoHelper()
err := clientSet.ResourceV1alpha2().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{})
gomega.Expect(err).To(matchError, fmt.Sprintf("DELETE: %s + %s", clientName, slice.Name))
}
// Create with different clients, keep it in the end.
mustFailToCreate(realNodeClient, "real plugin", fictionalNodeSlice, matchVAPDeniedError)
createdFictionalNodeSlice := mustCreate(fictionalNodeClient, "fictional plugin", fictionalNodeSlice)
// Update with different clients.
mustFailToUpdate(realNodeClient, "real plugin", createdFictionalNodeSlice, matchVAPDeniedError)
createdFictionalNodeSlice = mustUpdate(fictionalNodeClient, "fictional plugin", createdFictionalNodeSlice)
createdFictionalNodeSlice = mustUpdate(f.ClientSet, "admin", createdFictionalNodeSlice)
// Delete with different clients.
mustFailToDelete(realNodeClient, "real plugin", createdFictionalNodeSlice, matchVAPDeniedError)
mustDelete(fictionalNodeClient, "fictional plugin", createdFictionalNodeSlice)
})
f.It("must manage ResourceSlices", f.WithSlow(), func(ctx context.Context) {
driverName := driver.Name
@ -1100,6 +1191,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
b := newBuilder(f, driver)
preScheduledTests(b, driver, resourcev1alpha2.AllocationModeImmediate)
claimTests(b, driver, resourcev1alpha2.AllocationModeImmediate)
})
})

View File

@ -0,0 +1,84 @@
# Real driver deployments must replace all occurrences of "dra-kubelet-plugin"
# with something specific to their driver.
apiVersion: v1
kind: ServiceAccount
metadata:
name: dra-kubelet-plugin-service-account
namespace: dra-kubelet-plugin-namespace
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dra-kubelet-plugin-role
rules:
- apiGroups: ["resource.k8s.io"]
resources: ["resourceclaims"]
verbs: ["get"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get"]
- apiGroups: ["resource.k8s.io"]
resources: ["resourceslices"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dra-kubelet-plugin-role-binding
subjects:
- kind: ServiceAccount
name: dra-kubelet-plugin-service-account
namespace: dra-kubelet-plugin-namespace
roleRef:
kind: ClusterRole
name: dra-kubelet-plugin-role
apiGroup: rbac.authorization.k8s.io
---
# This ValidatingAdmissionPolicy is specific to the DRA driver's kubelet plugin
# because it checks the ServiceAccount defined for it above. An admin could
# also define a single policy for all drivers.
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicy
metadata:
name: resourceslices-policy-dra-kubelet-plugin
spec:
failurePolicy: Fail
matchConstraints:
resourceRules:
- apiGroups: ["resource.k8s.io"]
apiVersions: ["v1alpha2"]
operations: ["CREATE", "UPDATE", "DELETE"]
resources: ["resourceslices"]
variables:
- name: hasNodeName
expression: >-
"authentication.kubernetes.io/node-name" in request.userInfo.extra
- name: isKubeletPlugin
expression: >-
request.userInfo.username == "system:serviceaccount:dra-kubelet-plugin-namespace:dra-kubelet-plugin-service-account"
- name: objectNodeName
expression: >-
(request.operation == "DELETE" ? oldObject : object).?nodeName.orValue("")
validations:
- expression: >-
!variables.isKubeletPlugin || variables.hasNodeName
message: This user must have a "authentication.kubernetes.io/node-name" claim. ServiceAccountTokenNodeBindingValidation must be enabled in the cluster.
- expression: >-
!variables.isKubeletPlugin || !variables.hasNodeName ||
variables.objectNodeName == request.userInfo.extra["authentication.kubernetes.io/node-name"][0]
message: This DRA kubelet plugin may only modify resourceslices that belong to the node the pod is running on.
# This is useful for debugging. Can be dropped in a production deployment.
messageExpression: >-
"The DRA kubelet plugin on node " + request.userInfo.extra["authentication.kubernetes.io/node-name"][0] +
" may only modify resourceslices that belong to the node the pod is running on, not " +
(variables.objectNodeName == "" ? variables.objectNodeName : "a cluster-scoped slice") + "."
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicyBinding
metadata:
name: resourceslices-policy-dra-kubelet-plugin
spec:
policyName: resourceslices-policy-dra-kubelet-plugin
validationActions: [Deny]
# All ResourceSlices are matched.