DRA e2e: adapt to v1alpha3 API

This commit is contained in:
Patrick Ohly 2024-07-11 16:29:14 +02:00
parent 877829aeaa
commit 0b62bfb690
10 changed files with 1059 additions and 873 deletions

View File

@ -38,7 +38,6 @@ import (
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha3" resourceapi "k8s.io/api/resource/v1alpha3"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -99,6 +98,7 @@ func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
nodes.NodeNames = append(nodes.NodeNames, node.Name) nodes.NodeNames = append(nodes.NodeNames, node.Name)
} }
sort.Strings(nodes.NodeNames)
framework.Logf("testing on nodes %v", nodes.NodeNames) framework.Logf("testing on nodes %v", nodes.NodeNames)
// Watch claims in the namespace. This is useful for monitoring a test // Watch claims in the namespace. This is useful for monitoring a test
@ -153,7 +153,7 @@ func validateClaim(claim *resourceapi.ResourceClaim) {
// NewDriver sets up controller (as client of the cluster) and // NewDriver sets up controller (as client of the cluster) and
// kubelet plugin (via proxy) before the test runs. It cleans // kubelet plugin (via proxy) before the test runs. It cleans
// up after the test. // up after the test.
func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver { func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources, devicesPerNode ...map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) *Driver {
d := &Driver{ d := &Driver{
f: f, f: f,
fail: map[MethodInstance]bool{}, fail: map[MethodInstance]bool{},
@ -169,7 +169,7 @@ func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() a
resources.Nodes = nodes.NodeNames resources.Nodes = nodes.NodeNames
} }
ginkgo.DeferCleanup(d.IsGone) // Register first so it gets called last. ginkgo.DeferCleanup(d.IsGone) // Register first so it gets called last.
d.SetUp(nodes, resources) d.SetUp(nodes, resources, devicesPerNode...)
ginkgo.DeferCleanup(d.TearDown) ginkgo.DeferCleanup(d.TearDown)
}) })
return d return d
@ -195,13 +195,8 @@ type Driver struct {
// In addition, there is one entry for a fictional node. // In addition, there is one entry for a fictional node.
Nodes map[string]KubeletPlugin Nodes map[string]KubeletPlugin
parameterMode parameterMode parameterMode parameterMode // empty == parameterModeStructured
parameterAPIGroup string NodeV1alpha3 bool
parameterAPIVersion string
claimParameterAPIKind string
classParameterAPIKind string
NodeV1alpha3 bool
mutex sync.Mutex mutex sync.Mutex
fail map[MethodInstance]bool fail map[MethodInstance]bool
@ -216,12 +211,11 @@ type KubeletPlugin struct {
type parameterMode string type parameterMode string
const ( const (
parameterModeConfigMap parameterMode = "configmap" // ConfigMap parameters, control plane controller. parameterModeClassicDRA parameterMode = "classic" // control plane controller
parameterModeStructured parameterMode = "structured" // No ConfigMaps, directly create and reference in-tree parameter objects. parameterModeStructured parameterMode = "structured" // allocation through scheduler
parameterModeTranslated parameterMode = "translated" // Reference ConfigMaps in claim and class, generate in-tree parameter objects.
) )
func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { func (d *Driver) SetUp(nodes *Nodes, resources app.Resources, devicesPerNode ...map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) {
ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames)) ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames))
d.Nodes = make(map[string]KubeletPlugin) d.Nodes = make(map[string]KubeletPlugin)
d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io" d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io"
@ -236,8 +230,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
d.ctx = ctx d.ctx = ctx
d.cleanup = append(d.cleanup, cancel) d.cleanup = append(d.cleanup, cancel)
if d.parameterMode == "" {
d.parameterMode = parameterModeStructured
}
switch d.parameterMode { switch d.parameterMode {
case "", parameterModeConfigMap: case parameterModeClassicDRA:
// The controller is easy: we simply connect to the API server. // The controller is easy: we simply connect to the API server.
d.Controller = app.NewController(d.f.ClientSet, resources) d.Controller = app.NewController(d.f.ClientSet, resources)
d.wg.Add(1) d.wg.Add(1)
@ -245,6 +243,49 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
defer d.wg.Done() defer d.wg.Done()
d.Controller.Run(d.ctx, 5 /* workers */) d.Controller.Run(d.ctx, 5 /* workers */)
}() }()
case parameterModeStructured:
if !resources.NodeLocal {
// Publish one resource pool with "network-attached" devices.
slice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: d.Name, // globally unique
},
Spec: resourceapi.ResourceSliceSpec{
Driver: d.Name,
Pool: resourceapi.ResourcePool{
Name: "network",
Generation: 1,
ResourceSliceCount: 1,
},
NodeSelector: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{{
MatchFields: []v1.NodeSelectorRequirement{{
Key: "metadata.name",
Operator: v1.NodeSelectorOpIn,
Values: nodes.NodeNames,
}},
}},
},
},
}
maxAllocations := resources.MaxAllocations
if maxAllocations <= 0 {
// Cannot be empty, otherwise nothing runs.
maxAllocations = 10
}
for i := 0; i < maxAllocations; i++ {
slice.Spec.Devices = append(slice.Spec.Devices, resourceapi.Device{
Name: fmt.Sprintf("device-%d", i),
Basic: &resourceapi.BasicDevice{},
})
}
_, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
framework.ExpectNoError(err)
ginkgo.DeferCleanup(func(ctx context.Context) {
framework.ExpectNoError(d.f.ClientSet.ResourceV1alpha3().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{}))
})
}
} }
manifests := []string{ manifests := []string{
@ -252,24 +293,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
// container names, etc.). // container names, etc.).
"test/e2e/testing-manifests/dra/dra-test-driver-proxy.yaml", "test/e2e/testing-manifests/dra/dra-test-driver-proxy.yaml",
} }
if d.parameterMode == "" { var numDevices = -1 // disabled
d.parameterMode = parameterModeConfigMap if d.parameterMode != parameterModeClassicDRA && resources.NodeLocal {
} numDevices = resources.MaxAllocations
var numResourceInstances = -1 // disabled
if d.parameterMode != parameterModeConfigMap {
numResourceInstances = resources.MaxAllocations
} }
switch d.parameterMode { switch d.parameterMode {
case parameterModeConfigMap, parameterModeTranslated: case parameterModeClassicDRA, parameterModeStructured:
d.parameterAPIGroup = ""
d.parameterAPIVersion = "v1"
d.claimParameterAPIKind = "ConfigMap"
d.classParameterAPIKind = "ConfigMap"
case parameterModeStructured:
d.parameterAPIGroup = "resource.k8s.io"
d.parameterAPIVersion = "v1alpha3"
d.claimParameterAPIKind = "ResourceClaimParameters"
d.classParameterAPIKind = "ResourceClassParameters"
default: default:
framework.Failf("unknown test driver parameter mode: %s", d.parameterMode) framework.Failf("unknown test driver parameter mode: %s", d.parameterMode)
} }
@ -314,10 +343,6 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
item.Spec.Template.Spec.Volumes[2].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins_registry") item.Spec.Template.Spec.Volumes[2].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins_registry")
item.Spec.Template.Spec.Containers[0].Args = append(item.Spec.Template.Spec.Containers[0].Args, "--endpoint=/plugins_registry/"+d.Name+"-reg.sock") item.Spec.Template.Spec.Containers[0].Args = append(item.Spec.Template.Spec.Containers[0].Args, "--endpoint=/plugins_registry/"+d.Name+"-reg.sock")
item.Spec.Template.Spec.Containers[1].Args = append(item.Spec.Template.Spec.Containers[1].Args, "--endpoint=/dra/"+d.Name+".sock") item.Spec.Template.Spec.Containers[1].Args = append(item.Spec.Template.Spec.Containers[1].Args, "--endpoint=/dra/"+d.Name+".sock")
case *apiextensionsv1.CustomResourceDefinition:
item.Name = strings.ReplaceAll(item.Name, "dra.e2e.example.com", d.parameterAPIGroup)
item.Spec.Group = d.parameterAPIGroup
} }
return nil return nil
}, manifests...) }, manifests...)
@ -336,9 +361,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
pods, err := d.f.ClientSet.CoreV1().Pods(d.f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: selector.String()}) pods, err := d.f.ClientSet.CoreV1().Pods(d.f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
framework.ExpectNoError(err, "list proxy pods") framework.ExpectNoError(err, "list proxy pods")
gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods") gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods")
sort.Slice(pods.Items, func(i, j int) bool {
return pods.Items[i].Spec.NodeName < pods.Items[j].Spec.NodeName
})
// Run registrar and plugin for each of the pods. // Run registrar and plugin for each of the pods.
for _, pod := range pods.Items { for i, pod := range pods.Items {
// Need a local variable, not the loop variable, for the anonymous // Need a local variable, not the loop variable, for the anonymous
// callback functions below. // callback functions below.
pod := pod pod := pod
@ -361,18 +389,23 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod)) logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod))
loggerCtx := klog.NewContext(ctx, logger) loggerCtx := klog.NewContext(ctx, logger)
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename, fileOps := app.FileOperations{
app.FileOperations{ Create: func(name string, content []byte) error {
Create: func(name string, content []byte) error { klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content)) return d.createFile(&pod, name, content)
return d.createFile(&pod, name, content)
},
Remove: func(name string) error {
klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
return d.removeFile(&pod, name)
},
NumResourceInstances: numResourceInstances,
}, },
Remove: func(name string) error {
klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
return d.removeFile(&pod, name)
},
}
if i < len(devicesPerNode) {
fileOps.Devices = devicesPerNode[i]
fileOps.NumDevices = -1
} else {
fileOps.NumDevices = numDevices
}
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename, fileOps,
kubeletplugin.GRPCVerbosity(0), kubeletplugin.GRPCVerbosity(0),
kubeletplugin.GRPCInterceptor(func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) { kubeletplugin.GRPCInterceptor(func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
return d.interceptor(nodename, ctx, req, info, handler) return d.interceptor(nodename, ctx, req, info, handler)
@ -527,7 +560,7 @@ func (d *Driver) TearDown() {
func (d *Driver) IsGone(ctx context.Context) { func (d *Driver) IsGone(ctx context.Context) {
gomega.Eventually(ctx, func(ctx context.Context) ([]resourceapi.ResourceSlice, error) { gomega.Eventually(ctx, func(ctx context.Context) ([]resourceapi.ResourceSlice, error) {
slices, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().List(ctx, metav1.ListOptions{FieldSelector: "driverName=" + d.Name}) slices, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().List(ctx, metav1.ListOptions{FieldSelector: resourceapi.ResourceSliceSelectorDriver + "=" + d.Name})
if err != nil { if err != nil {
return nil, err return nil, err
} }

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,7 @@ nodes:
scheduler: scheduler:
extraArgs: extraArgs:
v: "5" v: "5"
vmodule: "allocator=6,dynamicresources=6" # structured/allocator.go, DRA scheduler plugin
controllerManager: controllerManager:
extraArgs: extraArgs:
v: "5" v: "5"

View File

@ -20,16 +20,13 @@ package app
import ( import (
"context" "context"
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"math/rand" "slices"
"strings"
"sync" "sync"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha3" resourceapi "k8s.io/api/resource/v1alpha3"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
@ -48,7 +45,9 @@ type Resources struct {
Nodes []string Nodes []string
// NodeLabels are labels which determine on which nodes resources are // NodeLabels are labels which determine on which nodes resources are
// available. Mutually exclusive with Nodes. // available. Mutually exclusive with Nodes.
NodeLabels labels.Set NodeLabels labels.Set
// Number of devices called "device-000", "device-001", ... on each node or in the cluster.
MaxAllocations int MaxAllocations int
// AllocateWrapper, if set, gets called for each Allocate call. // AllocateWrapper, if set, gets called for each Allocate call.
@ -68,12 +67,16 @@ func (r Resources) AllNodes(nodeLister listersv1.NodeLister) []string {
return r.Nodes return r.Nodes
} }
func (r Resources) NewAllocation(node string, data []byte) *resourceapi.AllocationResult { func (r Resources) newAllocation(requestName, node string, config []resourceapi.DeviceAllocationConfiguration) *resourceapi.AllocationResult {
allocation := &resourceapi.AllocationResult{} allocation := &resourceapi.AllocationResult{
allocation.ResourceHandles = []resourceapi.ResourceHandle{ Devices: resourceapi.DeviceAllocationResult{
{ Results: []resourceapi.DeviceRequestAllocationResult{{
DriverName: r.DriverName, Driver: r.DriverName,
Data: string(data), Pool: "none",
Request: requestName,
Device: "none",
}},
Config: config,
}, },
} }
if node == "" && len(r.NodeLabels) > 0 { if node == "" && len(r.NodeLabels) > 0 {
@ -86,7 +89,7 @@ func (r Resources) NewAllocation(node string, data []byte) *resourceapi.Allocati
Values: []string{value}, Values: []string{value},
}) })
} }
allocation.AvailableOnNodes = &v1.NodeSelector{ allocation.NodeSelector = &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{ NodeSelectorTerms: []v1.NodeSelectorTerm{
{ {
MatchExpressions: requirements, MatchExpressions: requirements,
@ -103,7 +106,7 @@ func (r Resources) NewAllocation(node string, data []byte) *resourceapi.Allocati
nodes = r.Nodes nodes = r.Nodes
} }
if len(nodes) > 0 { if len(nodes) > 0 {
allocation.AvailableOnNodes = &v1.NodeSelector{ allocation.NodeSelector = &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{ NodeSelectorTerms: []v1.NodeSelectorTerm{
{ {
MatchExpressions: []v1.NodeSelectorRequirement{ MatchExpressions: []v1.NodeSelectorRequirement{
@ -166,11 +169,6 @@ func (c *ExampleController) Run(ctx context.Context, workers int) {
informerFactory.Shutdown() informerFactory.Shutdown()
} }
type parameters struct {
EnvVars map[string]string
NodeName string
}
var _ controller.Driver = &ExampleController{} var _ controller.Driver = &ExampleController{}
// GetNumAllocations returns the number of times that a claim was allocated. // GetNumAllocations returns the number of times that a claim was allocated.
@ -193,36 +191,6 @@ func (c *ExampleController) GetNumDeallocations() int64 {
return c.numDeallocations return c.numDeallocations
} }
func (c *ExampleController) GetClassParameters(ctx context.Context, class *resourceapi.ResourceClass) (interface{}, error) {
if class.ParametersRef != nil {
if class.ParametersRef.APIGroup != "" ||
class.ParametersRef.Kind != "ConfigMap" {
return nil, fmt.Errorf("class parameters are only supported in APIVersion v1, Kind ConfigMap, got: %v", class.ParametersRef)
}
return c.readParametersFromConfigMap(ctx, class.ParametersRef.Namespace, class.ParametersRef.Name)
}
return nil, nil
}
func (c *ExampleController) GetClaimParameters(ctx context.Context, claim *resourceapi.ResourceClaim, class *resourceapi.ResourceClass, classParameters interface{}) (interface{}, error) {
if claim.Spec.ParametersRef != nil {
if claim.Spec.ParametersRef.APIGroup != "" ||
claim.Spec.ParametersRef.Kind != "ConfigMap" {
return nil, fmt.Errorf("claim parameters are only supported in APIVersion v1, Kind ConfigMap, got: %v", claim.Spec.ParametersRef)
}
return c.readParametersFromConfigMap(ctx, claim.Namespace, claim.Spec.ParametersRef.Name)
}
return nil, nil
}
func (c *ExampleController) readParametersFromConfigMap(ctx context.Context, namespace, name string) (map[string]string, error) {
configMap, err := c.clientset.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("get config map: %w", err)
}
return configMap.Data, nil
}
func (c *ExampleController) Allocate(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) { func (c *ExampleController) Allocate(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) {
if c.resources.AllocateWrapper != nil { if c.resources.AllocateWrapper != nil {
@ -236,7 +204,7 @@ func (c *ExampleController) Allocate(ctx context.Context, claimAllocations []*co
func (c *ExampleController) allocateOneByOne(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) { func (c *ExampleController) allocateOneByOne(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) {
for _, ca := range claimAllocations { for _, ca := range claimAllocations {
allocationResult, err := c.allocateOne(ctx, ca.Claim, ca.ClaimParameters, ca.Class, ca.ClassParameters, selectedNode) allocationResult, err := c.allocateOne(ctx, ca.Claim, ca.DeviceClasses, selectedNode)
if err != nil { if err != nil {
ca.Error = err ca.Error = err
continue continue
@ -246,12 +214,25 @@ func (c *ExampleController) allocateOneByOne(ctx context.Context, claimAllocatio
} }
// allocate simply copies parameters as JSON map into a ResourceHandle. // allocate simply copies parameters as JSON map into a ResourceHandle.
func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.ResourceClaim, claimParameters interface{}, class *resourceapi.ResourceClass, classParameters interface{}, selectedNode string) (result *resourceapi.AllocationResult, err error) { func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.ResourceClaim, deviceClasses map[string]*resourceapi.DeviceClass, selectedNode string) (result *resourceapi.AllocationResult, err error) {
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.FromContext(ctx), "Allocate"), "claim", klog.KObj(claim), "uid", claim.UID) logger := klog.LoggerWithValues(klog.LoggerWithName(klog.FromContext(ctx), "Allocate"), "claim", klog.KObj(claim), "uid", claim.UID)
defer func() { defer func() {
logger.V(3).Info("done", "result", result, "err", err) logger.V(3).Info("done", "result", result, "err", err)
}() }()
if len(claim.Spec.Devices.Requests) != 1 ||
claim.Spec.Devices.Requests[0].DeviceClassName == "" ||
claim.Spec.Devices.Requests[0].AllocationMode != resourceapi.DeviceAllocationModeExactCount ||
claim.Spec.Devices.Requests[0].Count != 1 {
return nil, errors.New("only claims requesting exactly one device are supported")
}
request := claim.Spec.Devices.Requests[0]
class := deviceClasses[request.DeviceClassName]
if len(request.Selectors) > 0 ||
class != nil && len(class.Spec.Selectors) > 0 {
return nil, errors.New("device selectors are not supported")
}
c.mutex.Lock() c.mutex.Lock()
defer c.mutex.Unlock() defer c.mutex.Unlock()
@ -267,24 +248,7 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.
nodes := c.resources.AllNodes(c.nodeLister) nodes := c.resources.AllNodes(c.nodeLister)
if c.resources.NodeLocal { if c.resources.NodeLocal {
node = selectedNode node = selectedNode
if node == "" { if !slices.Contains(nodes, node) ||
// If none has been selected because we do immediate allocation,
// then we need to pick one ourselves.
var viableNodes []string
for _, n := range nodes {
if c.resources.MaxAllocations == 0 ||
c.claimsPerNode[n] < c.resources.MaxAllocations {
viableNodes = append(viableNodes, n)
}
}
if len(viableNodes) == 0 {
return nil, errors.New("resources exhausted on all nodes")
}
// Pick randomly. We could also prefer the one with the least
// number of allocations (even spreading) or the most (packing).
node = viableNodes[rand.Intn(len(viableNodes))]
logger.V(3).Info("picked a node ourselves", "selectedNode", selectedNode)
} else if !contains(nodes, node) ||
c.resources.MaxAllocations > 0 && c.resources.MaxAllocations > 0 &&
c.claimsPerNode[node] >= c.resources.MaxAllocations { c.claimsPerNode[node] >= c.resources.MaxAllocations {
return nil, fmt.Errorf("resources exhausted on node %q", node) return nil, fmt.Errorf("resources exhausted on node %q", node)
@ -297,17 +261,47 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.
} }
} }
p := parameters{ var configs []resourceapi.DeviceAllocationConfiguration
EnvVars: make(map[string]string), for i, config := range claim.Spec.Devices.Config {
NodeName: node, if len(config.Requests) != 0 &&
!slices.Contains(config.Requests, request.Name) {
// Does not apply to request.
continue
}
if config.Opaque == nil {
return nil, fmt.Errorf("claim config #%d: only opaque configuration supported", i)
}
if config.Opaque.Driver != c.resources.DriverName {
// Does not apply to driver.
continue
}
// A normal driver would validate the config here. The test
// driver just passes it through.
configs = append(configs,
resourceapi.DeviceAllocationConfiguration{
Source: resourceapi.AllocationConfigSourceClaim,
DeviceConfiguration: config.DeviceConfiguration,
},
)
} }
toEnvVars("user", claimParameters, p.EnvVars) if class != nil {
toEnvVars("admin", classParameters, p.EnvVars) for i, config := range class.Spec.Config {
data, err := json.Marshal(p) if config.Opaque == nil {
if err != nil { return nil, fmt.Errorf("class config #%d: only opaque configuration supported", i)
return nil, fmt.Errorf("encode parameters: %w", err) }
if config.Opaque.Driver != c.resources.DriverName {
// Does not apply to driver.
continue
}
configs = append(configs,
resourceapi.DeviceAllocationConfiguration{
Source: resourceapi.AllocationConfigSourceClass,
DeviceConfiguration: config.DeviceConfiguration,
},
)
}
} }
allocation := c.resources.NewAllocation(node, data) allocation := c.resources.newAllocation(request.Name, node, configs)
if !alreadyAllocated { if !alreadyAllocated {
c.numAllocations++ c.numAllocations++
c.allocated[claim.UID] = node c.allocated[claim.UID] = node
@ -359,7 +353,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
// can only work if a node has capacity left // can only work if a node has capacity left
// for all of them. Also, nodes that the driver // for all of them. Also, nodes that the driver
// doesn't run on cannot be used. // doesn't run on cannot be used.
if !contains(nodes, node) || if !slices.Contains(nodes, node) ||
c.claimsPerNode[node]+len(claims) > c.resources.MaxAllocations { c.claimsPerNode[node]+len(claims) > c.resources.MaxAllocations {
claim.UnsuitableNodes = append(claim.UnsuitableNodes, node) claim.UnsuitableNodes = append(claim.UnsuitableNodes, node)
} }
@ -372,7 +366,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
for _, claim := range claims { for _, claim := range claims {
claim.UnsuitableNodes = nil claim.UnsuitableNodes = nil
for _, node := range potentialNodes { for _, node := range potentialNodes {
if !contains(nodes, node) || if !slices.Contains(nodes, node) ||
allocations+len(claims) > c.resources.MaxAllocations { allocations+len(claims) > c.resources.MaxAllocations {
claim.UnsuitableNodes = append(claim.UnsuitableNodes, node) claim.UnsuitableNodes = append(claim.UnsuitableNodes, node)
} }
@ -381,24 +375,3 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
return nil return nil
} }
func toEnvVars(what string, from interface{}, to map[string]string) {
if from == nil {
return
}
env := from.(map[string]string)
for key, value := range env {
to[what+"_"+strings.ToLower(key)] = value
}
}
func contains[T comparable](list []T, value T) bool {
for _, v := range list {
if v == value {
return true
}
}
return false
}

View File

@ -23,6 +23,9 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"regexp"
"slices"
"sort"
"strings" "strings"
"sync" "sync"
@ -46,15 +49,14 @@ type ExamplePlugin struct {
d kubeletplugin.DRAPlugin d kubeletplugin.DRAPlugin
fileOps FileOperations fileOps FileOperations
cdiDir string cdiDir string
driverName string driverName string
nodeName string nodeName string
instances sets.Set[string] deviceNames sets.Set[string]
mutex sync.Mutex mutex sync.Mutex
instancesInUse sets.Set[string] prepared map[ClaimID][]Device // prepared claims -> result of nodePrepareResource
prepared map[ClaimID][]string // instance names gRPCCalls []GRPCCall
gRPCCalls []GRPCCall
blockPrepareResourcesMutex sync.Mutex blockPrepareResourcesMutex sync.Mutex
blockUnprepareResourcesMutex sync.Mutex blockUnprepareResourcesMutex sync.Mutex
@ -88,11 +90,18 @@ type ClaimID struct {
UID string UID string
} }
type Device struct {
PoolName string
DeviceName string
RequestName string
CDIDeviceID string
}
var _ drapb.NodeServer = &ExamplePlugin{} var _ drapb.NodeServer = &ExamplePlugin{}
// getJSONFilePath returns the absolute path where CDI file is/should be. // getJSONFilePath returns the absolute path where CDI file is/should be.
func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string { func (ex *ExamplePlugin) getJSONFilePath(claimUID string, requestName string) string {
return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s.json", ex.driverName, claimUID)) return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s-%s.json", ex.driverName, claimUID, requestName))
} }
// FileOperations defines optional callbacks for handling CDI files // FileOperations defines optional callbacks for handling CDI files
@ -105,10 +114,13 @@ type FileOperations struct {
// file does not exist. // file does not exist.
Remove func(name string) error Remove func(name string) error
// NumResourceInstances determines whether the plugin reports resources // NumDevices determines whether the plugin reports devices
// instances and how many. A negative value causes it to report "not implemented" // and how many. It reports nothing if negative.
// in the NodeListAndWatchResources gRPC call. NumDevices int
NumResourceInstances int
// Pre-defined devices, with each device name mapped to
// the device attributes. Not used if NumDevices >= 0.
Devices map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute
} }
// StartPlugin sets up the servers that are necessary for a DRA kubelet plugin. // StartPlugin sets up the servers that are necessary for a DRA kubelet plugin.
@ -129,22 +141,23 @@ func StartPlugin(ctx context.Context, cdiDir, driverName string, kubeClient kube
} }
} }
ex := &ExamplePlugin{ ex := &ExamplePlugin{
stopCh: ctx.Done(), stopCh: ctx.Done(),
logger: logger, logger: logger,
kubeClient: kubeClient, kubeClient: kubeClient,
fileOps: fileOps, fileOps: fileOps,
cdiDir: cdiDir, cdiDir: cdiDir,
driverName: driverName, driverName: driverName,
nodeName: nodeName, nodeName: nodeName,
instances: sets.New[string](), prepared: make(map[ClaimID][]Device),
instancesInUse: sets.New[string](), deviceNames: sets.New[string](),
prepared: make(map[ClaimID][]string),
} }
for i := 0; i < ex.fileOps.NumResourceInstances; i++ { for i := 0; i < ex.fileOps.NumDevices; i++ {
ex.instances.Insert(fmt.Sprintf("instance-%02d", i)) ex.deviceNames.Insert(fmt.Sprintf("device-%02d", i))
}
for deviceName := range ex.fileOps.Devices {
ex.deviceNames.Insert(deviceName)
} }
opts = append(opts, opts = append(opts,
kubeletplugin.DriverName(driverName), kubeletplugin.DriverName(driverName),
kubeletplugin.NodeName(nodeName), kubeletplugin.NodeName(nodeName),
@ -158,19 +171,30 @@ func StartPlugin(ctx context.Context, cdiDir, driverName string, kubeClient kube
} }
ex.d = d ex.d = d
if fileOps.NumResourceInstances >= 0 { if fileOps.NumDevices >= 0 {
instances := make([]resourceapi.NamedResourcesInstance, ex.fileOps.NumResourceInstances) devices := make([]resourceapi.Device, ex.fileOps.NumDevices)
for i := 0; i < ex.fileOps.NumResourceInstances; i++ { for i := 0; i < ex.fileOps.NumDevices; i++ {
instances[i].Name = fmt.Sprintf("instance-%02d", i) devices[i] = resourceapi.Device{
Name: fmt.Sprintf("device-%02d", i),
Basic: &resourceapi.BasicDevice{},
}
} }
nodeResources := []*resourceapi.ResourceModel{ resources := kubeletplugin.Resources{
{ Devices: devices,
NamedResources: &resourceapi.NamedResourcesResources{
Instances: instances,
},
},
} }
ex.d.PublishResources(ctx, nodeResources) ex.d.PublishResources(ctx, resources)
} else if len(ex.fileOps.Devices) > 0 {
devices := make([]resourceapi.Device, len(ex.fileOps.Devices))
for i, deviceName := range sets.List(ex.deviceNames) {
devices[i] = resourceapi.Device{
Name: deviceName,
Basic: &resourceapi.BasicDevice{Attributes: ex.fileOps.Devices[deviceName]},
}
}
resources := kubeletplugin.Resources{
Devices: devices,
}
ex.d.PublishResources(ctx, resources)
} }
return ex, nil return ex, nil
@ -245,17 +269,15 @@ func (ex *ExamplePlugin) getUnprepareResourcesFailure() error {
return ex.unprepareResourcesFailure return ex.unprepareResourcesFailure
} }
// NodePrepareResource ensures that the CDI file for the claim exists. It uses // NodePrepareResource ensures that the CDI file(s) (one per request) for the claim exists. It uses
// a deterministic name to simplify NodeUnprepareResource (no need to remember // a deterministic name to simplify NodeUnprepareResource (no need to remember
// or discover the name) and idempotency (when called again, the file simply // or discover the name) and idempotency (when called again, the file simply
// gets written again). // gets written again).
func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drapb.Claim) ([]string, error) { func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drapb.Claim) ([]Device, error) {
logger := klog.FromContext(ctx) logger := klog.FromContext(ctx)
// The plugin must retrieve the claim itself to get it in the version // The plugin must retrieve the claim itself to get it in the version
// that it understands. // that it understands.
var resourceHandle string
var structuredResourceHandle *resourceapi.StructuredResourceHandle
claim, err := ex.kubeClient.ResourceV1alpha3().ResourceClaims(claimReq.Namespace).Get(ctx, claimReq.Name, metav1.GetOptions{}) claim, err := ex.kubeClient.ResourceV1alpha3().ResourceClaims(claimReq.Namespace).Get(ctx, claimReq.Name, metav1.GetOptions{})
if err != nil { if err != nil {
return nil, fmt.Errorf("retrieve claim %s/%s: %w", claimReq.Namespace, claimReq.Name, err) return nil, fmt.Errorf("retrieve claim %s/%s: %w", claimReq.Namespace, claimReq.Name, err)
@ -263,127 +285,113 @@ func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drap
if claim.Status.Allocation == nil { if claim.Status.Allocation == nil {
return nil, fmt.Errorf("claim %s/%s not allocated", claimReq.Namespace, claimReq.Name) return nil, fmt.Errorf("claim %s/%s not allocated", claimReq.Namespace, claimReq.Name)
} }
if claim.UID != types.UID(claimReq.Uid) { if claim.UID != types.UID(claimReq.UID) {
return nil, fmt.Errorf("claim %s/%s got replaced", claimReq.Namespace, claimReq.Name) return nil, fmt.Errorf("claim %s/%s got replaced", claimReq.Namespace, claimReq.Name)
} }
haveResources := false
for _, handle := range claim.Status.Allocation.ResourceHandles {
if handle.DriverName == ex.driverName {
haveResources = true
resourceHandle = handle.Data
structuredResourceHandle = handle.StructuredData
break
}
}
if !haveResources {
// Nothing to do.
return nil, nil
}
ex.mutex.Lock() ex.mutex.Lock()
defer ex.mutex.Unlock() defer ex.mutex.Unlock()
ex.blockPrepareResourcesMutex.Lock() ex.blockPrepareResourcesMutex.Lock()
defer ex.blockPrepareResourcesMutex.Unlock() defer ex.blockPrepareResourcesMutex.Unlock()
deviceName := "claim-" + claimReq.Uid claimID := ClaimID{Name: claimReq.Name, UID: claimReq.UID}
vendor := ex.driverName if result, ok := ex.prepared[claimID]; ok {
class := "test"
dev := vendor + "/" + class + "=" + deviceName
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.Uid}
if _, ok := ex.prepared[claimID]; ok {
// Idempotent call, nothing to do. // Idempotent call, nothing to do.
return []string{dev}, nil return result, nil
} }
// Determine environment variables. var devices []Device
var p parameters for _, result := range claim.Status.Allocation.Devices.Results {
var instanceNames []string requestName := result.Request
if structuredResourceHandle == nil {
// Control plane controller did the allocation.
if err := json.Unmarshal([]byte(resourceHandle), &p); err != nil {
return nil, fmt.Errorf("unmarshal resource handle: %w", err)
}
} else {
// Scheduler did the allocation with structured parameters.
p.NodeName = structuredResourceHandle.NodeName
if err := extractParameters(structuredResourceHandle.VendorClassParameters, &p.EnvVars, "admin"); err != nil {
return nil, err
}
if err := extractParameters(structuredResourceHandle.VendorClaimParameters, &p.EnvVars, "user"); err != nil {
return nil, err
}
for _, result := range structuredResourceHandle.Results {
if err := extractParameters(result.VendorRequestParameters, &p.EnvVars, "user"); err != nil {
return nil, err
}
namedResources := result.NamedResources
if namedResources == nil {
return nil, errors.New("missing named resources allocation result")
}
instanceName := namedResources.Name
if instanceName == "" {
return nil, errors.New("empty named resources instance name")
}
if !ex.instances.Has(instanceName) {
return nil, fmt.Errorf("unknown allocated instance %q", instanceName)
}
if ex.instancesInUse.Has(instanceName) {
return nil, fmt.Errorf("resource instance %q used more than once", instanceName)
}
instanceNames = append(instanceNames, instanceName)
}
}
// Sanity check scheduling. // The driver joins all env variables in the order in which
if p.NodeName != "" && ex.nodeName != "" && p.NodeName != ex.nodeName { // they appear in results (last one wins).
return nil, fmt.Errorf("claim was allocated for %q, cannot be prepared on %q", p.NodeName, ex.nodeName) env := make(map[string]string)
} for i, config := range claim.Status.Allocation.Devices.Config {
if config.Opaque == nil ||
config.Opaque.Driver != ex.driverName ||
len(config.Requests) > 0 && !slices.Contains(config.Requests, requestName) {
continue
}
if err := extractParameters(config.Opaque.Parameters, &env, config.Source == resourceapi.AllocationConfigSourceClass); err != nil {
return nil, fmt.Errorf("parameters in config #%d: %w", i, err)
}
}
// CDI wants env variables as set of strings. // It also sets a claim_<claim name>_<request name>=true env variable.
envs := []string{} // This can be used to identify which devices where mapped into a container.
for key, val := range p.EnvVars { claimReqName := "claim_" + claim.Name + "_" + requestName
envs = append(envs, key+"="+val) claimReqName = regexp.MustCompile(`[^a-zA-Z0-9]`).ReplaceAllString(claimReqName, "_")
} env[claimReqName] = "true"
spec := &spec{ deviceName := "claim-" + claimReq.UID + "-" + requestName
Version: "0.3.0", // This has to be a version accepted by the runtimes. vendor := ex.driverName
Kind: vendor + "/" + class, class := "test"
// At least one device is required and its entry must have more cdiDeviceID := vendor + "/" + class + "=" + deviceName
// than just the name.
Devices: []device{ // CDI wants env variables as set of strings.
{ envs := []string{}
Name: deviceName, for key, val := range env {
ContainerEdits: containerEdits{ envs = append(envs, key+"="+val)
Env: envs, }
sort.Strings(envs)
if len(envs) == 0 {
// CDI does not support empty ContainerEdits. For example,
// kubelet+crio then fail with:
// CDI device injection failed: unresolvable CDI devices ...
//
// Inject nothing instead, which is supported by DRA.
continue
}
spec := &spec{
Version: "0.3.0", // This has to be a version accepted by the runtimes.
Kind: vendor + "/" + class,
// At least one device is required and its entry must have more
// than just the name.
Devices: []device{
{
Name: deviceName,
ContainerEdits: containerEdits{
Env: envs,
},
}, },
}, },
}, }
} filePath := ex.getJSONFilePath(claimReq.UID, requestName)
filePath := ex.getJSONFilePath(claimReq.Uid) buffer, err := json.Marshal(spec)
buffer, err := json.Marshal(spec) if err != nil {
if err != nil { return nil, fmt.Errorf("marshal spec: %w", err)
return nil, fmt.Errorf("marshal spec: %w", err) }
} if err := ex.fileOps.Create(filePath, buffer); err != nil {
if err := ex.fileOps.Create(filePath, buffer); err != nil { return nil, fmt.Errorf("failed to write CDI file: %w", err)
return nil, fmt.Errorf("failed to write CDI file %v", err) }
device := Device{
PoolName: result.Pool,
DeviceName: result.Device,
RequestName: requestName,
CDIDeviceID: cdiDeviceID,
}
devices = append(devices, device)
} }
ex.prepared[claimID] = instanceNames logger.V(3).Info("CDI file(s) created", "devices", devices)
for _, instanceName := range instanceNames { ex.prepared[claimID] = devices
ex.instancesInUse.Insert(instanceName) return devices, nil
}
logger.V(3).Info("CDI file created", "path", filePath, "device", dev)
return []string{dev}, nil
} }
func extractParameters(parameters runtime.RawExtension, env *map[string]string, kind string) error { func extractParameters(parameters runtime.RawExtension, env *map[string]string, admin bool) error {
if len(parameters.Raw) == 0 { if len(parameters.Raw) == 0 {
return nil return nil
} }
kind := "user"
if admin {
kind = "admin"
}
var data map[string]string var data map[string]string
if err := json.Unmarshal(parameters.Raw, &data); err != nil { if err := json.Unmarshal(parameters.Raw, &data); err != nil {
return fmt.Errorf("decoding %s parameters: %v", kind, err) return fmt.Errorf("decoding %s parameters: %w", kind, err)
} }
if len(data) > 0 && *env == nil { if len(data) > 0 && *env == nil {
*env = make(map[string]string) *env = make(map[string]string)
@ -404,15 +412,23 @@ func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapb.No
} }
for _, claimReq := range req.Claims { for _, claimReq := range req.Claims {
cdiDevices, err := ex.nodePrepareResource(ctx, claimReq) devices, err := ex.nodePrepareResource(ctx, claimReq)
if err != nil { if err != nil {
resp.Claims[claimReq.Uid] = &drapb.NodePrepareResourceResponse{ resp.Claims[claimReq.UID] = &drapb.NodePrepareResourceResponse{
Error: err.Error(), Error: err.Error(),
} }
} else { } else {
resp.Claims[claimReq.Uid] = &drapb.NodePrepareResourceResponse{ r := &drapb.NodePrepareResourceResponse{}
CDIDevices: cdiDevices, for _, device := range devices {
pbDevice := &drapb.Device{
PoolName: device.PoolName,
DeviceName: device.DeviceName,
RequestNames: []string{device.RequestName},
CDIDeviceIDs: []string{device.CDIDeviceID},
}
r.Devices = append(r.Devices, pbDevice)
} }
resp.Claims[claimReq.UID] = r
} }
} }
return resp, nil return resp, nil
@ -427,27 +443,23 @@ func (ex *ExamplePlugin) nodeUnprepareResource(ctx context.Context, claimReq *dr
logger := klog.FromContext(ctx) logger := klog.FromContext(ctx)
filePath := ex.getJSONFilePath(claimReq.Uid) claimID := ClaimID{Name: claimReq.Name, UID: claimReq.UID}
if err := ex.fileOps.Remove(filePath); err != nil { devices, ok := ex.prepared[claimID]
return fmt.Errorf("error removing CDI file: %w", err)
}
logger.V(3).Info("CDI file removed", "path", filePath)
ex.mutex.Lock()
defer ex.mutex.Unlock()
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.Uid}
instanceNames, ok := ex.prepared[claimID]
if !ok { if !ok {
// Idempotent call, nothing to do. // Idempotent call, nothing to do.
return nil return nil
} }
delete(ex.prepared, claimID) for _, device := range devices {
for _, instanceName := range instanceNames { filePath := ex.getJSONFilePath(claimReq.UID, device.RequestName)
ex.instancesInUse.Delete(instanceName) if err := ex.fileOps.Remove(filePath); err != nil {
return fmt.Errorf("error removing CDI file: %w", err)
}
logger.V(3).Info("CDI file removed", "path", filePath)
} }
delete(ex.prepared, claimID)
return nil return nil
} }
@ -463,11 +475,11 @@ func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapb.
for _, claimReq := range req.Claims { for _, claimReq := range req.Claims {
err := ex.nodeUnprepareResource(ctx, claimReq) err := ex.nodeUnprepareResource(ctx, claimReq)
if err != nil { if err != nil {
resp.Claims[claimReq.Uid] = &drapb.NodeUnprepareResourceResponse{ resp.Claims[claimReq.UID] = &drapb.NodeUnprepareResourceResponse{
Error: err.Error(), Error: err.Error(),
} }
} else { } else {
resp.Claims[claimReq.Uid] = &drapb.NodeUnprepareResourceResponse{} resp.Claims[claimReq.UID] = &drapb.NodeUnprepareResourceResponse{}
} }
} }
return resp, nil return resp, nil

View File

@ -0,0 +1,8 @@
apiVersion: resource.k8s.io/v1alpha3
kind: ResourceClass
metadata:
name: example
spec:
selectors:
- cel:
expression: device.driver == "test-driver.cdi.k8s.io"

View File

@ -47,7 +47,7 @@ spec:
matchConstraints: matchConstraints:
resourceRules: resourceRules:
- apiGroups: ["resource.k8s.io"] - apiGroups: ["resource.k8s.io"]
apiVersions: ["v1alpha2"] apiVersions: ["v1alpha3"]
operations: ["CREATE", "UPDATE", "DELETE"] operations: ["CREATE", "UPDATE", "DELETE"]
resources: ["resourceslices"] resources: ["resourceslices"]
variables: variables:
@ -59,7 +59,7 @@ spec:
request.userInfo.username == "system:serviceaccount:dra-kubelet-plugin-namespace:dra-kubelet-plugin-service-account" request.userInfo.username == "system:serviceaccount:dra-kubelet-plugin-namespace:dra-kubelet-plugin-service-account"
- name: objectNodeName - name: objectNodeName
expression: >- expression: >-
(request.operation == "DELETE" ? oldObject : object).?nodeName.orValue("") (request.operation == "DELETE" ? oldObject : object).spec.?nodeName.orValue("")
validations: validations:
- expression: >- - expression: >-
!variables.isKubeletPlugin || variables.hasNodeName !variables.isKubeletPlugin || variables.hasNodeName

View File

@ -1,18 +1,10 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: example-claim-parameters
namespace: default
data:
a: b
---
apiVersion: resource.k8s.io/v1alpha3 apiVersion: resource.k8s.io/v1alpha3
kind: ResourceClaim kind: ResourceClaim
metadata: metadata:
name: example name: example
namespace: default namespace: default
spec: spec:
resourceClassName: example devices:
parametersRef: requests:
kind: ConfigMap - name: req-0
name: example-claim-parameters deviceClassName: example

View File

@ -1,7 +0,0 @@
apiVersion: resource.k8s.io/v1alpha3
kind: ResourceClass
metadata:
name: example
driverName: test-driver.cdi.k8s.io
# TODO:
# parameters

View File

@ -30,17 +30,20 @@ import (
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"regexp"
"sort"
"strings"
"time" "time"
"github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega" "github.com/onsi/gomega"
"github.com/onsi/gomega/gstruct"
"github.com/onsi/gomega/types" "github.com/onsi/gomega/types"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha3" resourceapi "k8s.io/api/resource/v1alpha3"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
draplugin "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin" draplugin "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
@ -417,10 +420,9 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
}) })
ginkgo.It("must run pod if NodePrepareResources is in progress for one plugin when Kubelet restarts", func(ctx context.Context) { ginkgo.It("must run pod if NodePrepareResources is in progress for one plugin when Kubelet restarts", func(ctx context.Context) {
_, kubeletPlugin2 := start(ctx) kubeletPlugin1, kubeletPlugin2 := start(ctx)
kubeletPlugin := newKubeletPlugin(ctx, f.ClientSet, getNodeName(ctx, f), driverName)
unblock := kubeletPlugin.BlockNodePrepareResources() unblock := kubeletPlugin1.BlockNodePrepareResources()
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{kubeletPlugin1Name, kubeletPlugin2Name}) pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{kubeletPlugin1Name, kubeletPlugin2Name})
ginkgo.By("wait for pod to be in Pending state") ginkgo.By("wait for pod to be in Pending state")
@ -478,9 +480,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
} }
matchResourcesByNodeName := func(nodeName string) types.GomegaMatcher { matchResourcesByNodeName := func(nodeName string) types.GomegaMatcher {
return gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ return gomega.HaveField("Spec.NodeName", gomega.Equal(nodeName))
"NodeName": gomega.Equal(nodeName),
})
} }
f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) { f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) {
@ -562,7 +562,7 @@ func newKubeletPlugin(ctx context.Context, clientSet kubernetes.Interface, nodeN
ginkgo.DeferCleanup(func(ctx context.Context) { ginkgo.DeferCleanup(func(ctx context.Context) {
// kubelet should do this eventually, but better make sure. // kubelet should do this eventually, but better make sure.
// A separate test checks this explicitly. // A separate test checks this explicitly.
framework.ExpectNoError(clientSet.ResourceV1alpha3().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: "driverName=" + driverName})) framework.ExpectNoError(clientSet.ResourceV1alpha3().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: resourceapi.ResourceSliceSelectorDriver + "=" + driverName}))
}) })
ginkgo.DeferCleanup(plugin.Stop) ginkgo.DeferCleanup(plugin.Stop)
@ -573,18 +573,17 @@ func newKubeletPlugin(ctx context.Context, clientSet kubernetes.Interface, nodeN
// NOTE: as scheduler and controller manager are not running by the Node e2e, // NOTE: as scheduler and controller manager are not running by the Node e2e,
// the objects must contain all required data to be processed correctly by the API server // the objects must contain all required data to be processed correctly by the API server
// and placed on the node without involving the scheduler and the DRA controller // and placed on the node without involving the scheduler and the DRA controller
func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string, deferPodDeletion bool, pluginNames []string) *v1.Pod { func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string, deferPodDeletion bool, driverNames []string) *v1.Pod {
// ResourceClass // DeviceClass
class := &resourceapi.ResourceClass{ class := &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: className, Name: className,
}, },
DriverName: "controller",
} }
_, err := clientSet.ResourceV1alpha3().ResourceClasses().Create(ctx, class, metav1.CreateOptions{}) _, err := clientSet.ResourceV1alpha3().DeviceClasses().Create(ctx, class, metav1.CreateOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().ResourceClasses().Delete, className, metav1.DeleteOptions{}) ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().DeviceClasses().Delete, className, metav1.DeleteOptions{})
// ResourceClaim // ResourceClaim
podClaimName := "resource-claim" podClaimName := "resource-claim"
@ -593,7 +592,12 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
Name: claimName, Name: claimName,
}, },
Spec: resourceapi.ResourceClaimSpec{ Spec: resourceapi.ResourceClaimSpec{
ResourceClassName: className, Devices: resourceapi.DeviceClaim{
Requests: []resourceapi.DeviceRequest{{
Name: "my-request",
DeviceClassName: className,
}},
},
}, },
} }
createdClaim, err := clientSet.ResourceV1alpha3().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{}) createdClaim, err := clientSet.ResourceV1alpha3().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{})
@ -601,7 +605,18 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{}) ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{})
// Pod // The pod checks its own env with grep. Each driver injects its own parameters,
// with the driver name as part of the variable name. Sorting ensures that a
// single grep can match the output of env when that gets turned into a single
// line because the order is deterministic.
nameToEnv := func(driverName string) string {
return "DRA_" + regexp.MustCompile(`[^a-z0-9]`).ReplaceAllString(driverName, "_")
}
var expectedEnv []string
sort.Strings(driverNames)
for _, driverName := range driverNames {
expectedEnv = append(expectedEnv, nameToEnv(driverName)+"=PARAM1_VALUE")
}
containerName := "testcontainer" containerName := "testcontainer"
pod := &v1.Pod{ pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
@ -623,7 +638,9 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
Resources: v1.ResourceRequirements{ Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{{Name: podClaimName}}, Claims: []v1.ResourceClaim{{Name: podClaimName}},
}, },
Command: []string{"/bin/sh", "-c", "env | grep DRA_PARAM1=PARAM1_VALUE"}, // If injecting env variables fails, the pod fails and this error shows up in
// ... Terminated:&ContainerStateTerminated{ExitCode:1,Signal:0,Reason:Error,Message:ERROR: ...
Command: []string{"/bin/sh", "-c", "if ! echo $(env) | grep -q " + strings.Join(expectedEnv, ".*") + "; then echo ERROR: unexpected env: $(env) >/dev/termination-log; exit 1 ; fi"},
}, },
}, },
RestartPolicy: v1.RestartPolicyNever, RestartPolicy: v1.RestartPolicyNever,
@ -637,21 +654,36 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
} }
// Update claim status: set ReservedFor and AllocationResult // Update claim status: set ReservedFor and AllocationResult
// NOTE: This is usually done by the DRA controller // NOTE: This is usually done by the DRA controller or the scheduler.
resourceHandlers := make([]resourceapi.ResourceHandle, len(pluginNames)) results := make([]resourceapi.DeviceRequestAllocationResult, len(driverNames))
for i, pluginName := range pluginNames { config := make([]resourceapi.DeviceAllocationConfiguration, len(driverNames))
resourceHandlers[i] = resourceapi.ResourceHandle{ for i, driverName := range driverNames {
DriverName: pluginName, results[i] = resourceapi.DeviceRequestAllocationResult{
Data: "{\"EnvVars\":{\"DRA_PARAM1\":\"PARAM1_VALUE\"},\"NodeName\":\"\"}", Driver: driverName,
Pool: "some-pool",
Device: "some-device",
Request: claim.Spec.Devices.Requests[0].Name,
}
config[i] = resourceapi.DeviceAllocationConfiguration{
Source: resourceapi.AllocationConfigSourceClaim,
DeviceConfiguration: resourceapi.DeviceConfiguration{
Opaque: &resourceapi.OpaqueDeviceConfiguration{
Driver: driverName,
Parameters: runtime.RawExtension{Raw: []byte(`{"` + nameToEnv(driverName) + `":"PARAM1_VALUE"}`)},
},
},
} }
} }
createdClaim.Status = resourceapi.ResourceClaimStatus{ createdClaim.Status = resourceapi.ResourceClaimStatus{
DriverName: "controller",
ReservedFor: []resourceapi.ResourceClaimConsumerReference{ ReservedFor: []resourceapi.ResourceClaimConsumerReference{
{Resource: "pods", Name: podName, UID: createdPod.UID}, {Resource: "pods", Name: podName, UID: createdPod.UID},
}, },
Allocation: &resourceapi.AllocationResult{ Allocation: &resourceapi.AllocationResult{
ResourceHandles: resourceHandlers, Devices: resourceapi.DeviceAllocationResult{
Results: results,
Config: config,
},
}, },
} }
_, err = clientSet.ResourceV1alpha3().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{}) _, err = clientSet.ResourceV1alpha3().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{})
@ -665,10 +697,13 @@ func createTestResourceSlice(ctx context.Context, clientSet kubernetes.Interface
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: nodeName, Name: nodeName,
}, },
NodeName: nodeName, Spec: resourceapi.ResourceSliceSpec{
DriverName: driverName, NodeName: nodeName,
ResourceModel: resourceapi.ResourceModel{ Driver: driverName,
NamedResources: &resourceapi.NamedResourcesResources{}, Pool: resourceapi.ResourcePool{
Name: nodeName,
ResourceSliceCount: 1,
},
}, },
} }