mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 10:19:50 +00:00
DRA e2e: adapt to v1alpha3 API
This commit is contained in:
parent
877829aeaa
commit
0b62bfb690
@ -38,7 +38,6 @@ import (
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1alpha3"
|
||||
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -99,6 +98,7 @@ func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
|
||||
for _, node := range nodeList.Items {
|
||||
nodes.NodeNames = append(nodes.NodeNames, node.Name)
|
||||
}
|
||||
sort.Strings(nodes.NodeNames)
|
||||
framework.Logf("testing on nodes %v", nodes.NodeNames)
|
||||
|
||||
// Watch claims in the namespace. This is useful for monitoring a test
|
||||
@ -153,7 +153,7 @@ func validateClaim(claim *resourceapi.ResourceClaim) {
|
||||
// NewDriver sets up controller (as client of the cluster) and
|
||||
// kubelet plugin (via proxy) before the test runs. It cleans
|
||||
// up after the test.
|
||||
func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver {
|
||||
func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources, devicesPerNode ...map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) *Driver {
|
||||
d := &Driver{
|
||||
f: f,
|
||||
fail: map[MethodInstance]bool{},
|
||||
@ -169,7 +169,7 @@ func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() a
|
||||
resources.Nodes = nodes.NodeNames
|
||||
}
|
||||
ginkgo.DeferCleanup(d.IsGone) // Register first so it gets called last.
|
||||
d.SetUp(nodes, resources)
|
||||
d.SetUp(nodes, resources, devicesPerNode...)
|
||||
ginkgo.DeferCleanup(d.TearDown)
|
||||
})
|
||||
return d
|
||||
@ -195,13 +195,8 @@ type Driver struct {
|
||||
// In addition, there is one entry for a fictional node.
|
||||
Nodes map[string]KubeletPlugin
|
||||
|
||||
parameterMode parameterMode
|
||||
parameterAPIGroup string
|
||||
parameterAPIVersion string
|
||||
claimParameterAPIKind string
|
||||
classParameterAPIKind string
|
||||
|
||||
NodeV1alpha3 bool
|
||||
parameterMode parameterMode // empty == parameterModeStructured
|
||||
NodeV1alpha3 bool
|
||||
|
||||
mutex sync.Mutex
|
||||
fail map[MethodInstance]bool
|
||||
@ -216,12 +211,11 @@ type KubeletPlugin struct {
|
||||
type parameterMode string
|
||||
|
||||
const (
|
||||
parameterModeConfigMap parameterMode = "configmap" // ConfigMap parameters, control plane controller.
|
||||
parameterModeStructured parameterMode = "structured" // No ConfigMaps, directly create and reference in-tree parameter objects.
|
||||
parameterModeTranslated parameterMode = "translated" // Reference ConfigMaps in claim and class, generate in-tree parameter objects.
|
||||
parameterModeClassicDRA parameterMode = "classic" // control plane controller
|
||||
parameterModeStructured parameterMode = "structured" // allocation through scheduler
|
||||
)
|
||||
|
||||
func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
func (d *Driver) SetUp(nodes *Nodes, resources app.Resources, devicesPerNode ...map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute) {
|
||||
ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames))
|
||||
d.Nodes = make(map[string]KubeletPlugin)
|
||||
d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io"
|
||||
@ -236,8 +230,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
d.ctx = ctx
|
||||
d.cleanup = append(d.cleanup, cancel)
|
||||
|
||||
if d.parameterMode == "" {
|
||||
d.parameterMode = parameterModeStructured
|
||||
}
|
||||
|
||||
switch d.parameterMode {
|
||||
case "", parameterModeConfigMap:
|
||||
case parameterModeClassicDRA:
|
||||
// The controller is easy: we simply connect to the API server.
|
||||
d.Controller = app.NewController(d.f.ClientSet, resources)
|
||||
d.wg.Add(1)
|
||||
@ -245,6 +243,49 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
defer d.wg.Done()
|
||||
d.Controller.Run(d.ctx, 5 /* workers */)
|
||||
}()
|
||||
case parameterModeStructured:
|
||||
if !resources.NodeLocal {
|
||||
// Publish one resource pool with "network-attached" devices.
|
||||
slice := &resourceapi.ResourceSlice{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: d.Name, // globally unique
|
||||
},
|
||||
Spec: resourceapi.ResourceSliceSpec{
|
||||
Driver: d.Name,
|
||||
Pool: resourceapi.ResourcePool{
|
||||
Name: "network",
|
||||
Generation: 1,
|
||||
ResourceSliceCount: 1,
|
||||
},
|
||||
NodeSelector: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{{
|
||||
MatchFields: []v1.NodeSelectorRequirement{{
|
||||
Key: "metadata.name",
|
||||
Operator: v1.NodeSelectorOpIn,
|
||||
Values: nodes.NodeNames,
|
||||
}},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
maxAllocations := resources.MaxAllocations
|
||||
if maxAllocations <= 0 {
|
||||
// Cannot be empty, otherwise nothing runs.
|
||||
maxAllocations = 10
|
||||
}
|
||||
for i := 0; i < maxAllocations; i++ {
|
||||
slice.Spec.Devices = append(slice.Spec.Devices, resourceapi.Device{
|
||||
Name: fmt.Sprintf("device-%d", i),
|
||||
Basic: &resourceapi.BasicDevice{},
|
||||
})
|
||||
}
|
||||
|
||||
_, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
framework.ExpectNoError(d.f.ClientSet.ResourceV1alpha3().ResourceSlices().Delete(ctx, slice.Name, metav1.DeleteOptions{}))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
manifests := []string{
|
||||
@ -252,24 +293,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
// container names, etc.).
|
||||
"test/e2e/testing-manifests/dra/dra-test-driver-proxy.yaml",
|
||||
}
|
||||
if d.parameterMode == "" {
|
||||
d.parameterMode = parameterModeConfigMap
|
||||
}
|
||||
var numResourceInstances = -1 // disabled
|
||||
if d.parameterMode != parameterModeConfigMap {
|
||||
numResourceInstances = resources.MaxAllocations
|
||||
var numDevices = -1 // disabled
|
||||
if d.parameterMode != parameterModeClassicDRA && resources.NodeLocal {
|
||||
numDevices = resources.MaxAllocations
|
||||
}
|
||||
switch d.parameterMode {
|
||||
case parameterModeConfigMap, parameterModeTranslated:
|
||||
d.parameterAPIGroup = ""
|
||||
d.parameterAPIVersion = "v1"
|
||||
d.claimParameterAPIKind = "ConfigMap"
|
||||
d.classParameterAPIKind = "ConfigMap"
|
||||
case parameterModeStructured:
|
||||
d.parameterAPIGroup = "resource.k8s.io"
|
||||
d.parameterAPIVersion = "v1alpha3"
|
||||
d.claimParameterAPIKind = "ResourceClaimParameters"
|
||||
d.classParameterAPIKind = "ResourceClassParameters"
|
||||
case parameterModeClassicDRA, parameterModeStructured:
|
||||
default:
|
||||
framework.Failf("unknown test driver parameter mode: %s", d.parameterMode)
|
||||
}
|
||||
@ -314,10 +343,6 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
item.Spec.Template.Spec.Volumes[2].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins_registry")
|
||||
item.Spec.Template.Spec.Containers[0].Args = append(item.Spec.Template.Spec.Containers[0].Args, "--endpoint=/plugins_registry/"+d.Name+"-reg.sock")
|
||||
item.Spec.Template.Spec.Containers[1].Args = append(item.Spec.Template.Spec.Containers[1].Args, "--endpoint=/dra/"+d.Name+".sock")
|
||||
case *apiextensionsv1.CustomResourceDefinition:
|
||||
item.Name = strings.ReplaceAll(item.Name, "dra.e2e.example.com", d.parameterAPIGroup)
|
||||
item.Spec.Group = d.parameterAPIGroup
|
||||
|
||||
}
|
||||
return nil
|
||||
}, manifests...)
|
||||
@ -336,9 +361,12 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
pods, err := d.f.ClientSet.CoreV1().Pods(d.f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
|
||||
framework.ExpectNoError(err, "list proxy pods")
|
||||
gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods")
|
||||
sort.Slice(pods.Items, func(i, j int) bool {
|
||||
return pods.Items[i].Spec.NodeName < pods.Items[j].Spec.NodeName
|
||||
})
|
||||
|
||||
// Run registrar and plugin for each of the pods.
|
||||
for _, pod := range pods.Items {
|
||||
for i, pod := range pods.Items {
|
||||
// Need a local variable, not the loop variable, for the anonymous
|
||||
// callback functions below.
|
||||
pod := pod
|
||||
@ -361,18 +389,23 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
|
||||
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod))
|
||||
loggerCtx := klog.NewContext(ctx, logger)
|
||||
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename,
|
||||
app.FileOperations{
|
||||
Create: func(name string, content []byte) error {
|
||||
klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
|
||||
return d.createFile(&pod, name, content)
|
||||
},
|
||||
Remove: func(name string) error {
|
||||
klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
|
||||
return d.removeFile(&pod, name)
|
||||
},
|
||||
NumResourceInstances: numResourceInstances,
|
||||
fileOps := app.FileOperations{
|
||||
Create: func(name string, content []byte) error {
|
||||
klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
|
||||
return d.createFile(&pod, name, content)
|
||||
},
|
||||
Remove: func(name string) error {
|
||||
klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
|
||||
return d.removeFile(&pod, name)
|
||||
},
|
||||
}
|
||||
if i < len(devicesPerNode) {
|
||||
fileOps.Devices = devicesPerNode[i]
|
||||
fileOps.NumDevices = -1
|
||||
} else {
|
||||
fileOps.NumDevices = numDevices
|
||||
}
|
||||
plugin, err := app.StartPlugin(loggerCtx, "/cdi", d.Name, driverClient, nodename, fileOps,
|
||||
kubeletplugin.GRPCVerbosity(0),
|
||||
kubeletplugin.GRPCInterceptor(func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
|
||||
return d.interceptor(nodename, ctx, req, info, handler)
|
||||
@ -527,7 +560,7 @@ func (d *Driver) TearDown() {
|
||||
|
||||
func (d *Driver) IsGone(ctx context.Context) {
|
||||
gomega.Eventually(ctx, func(ctx context.Context) ([]resourceapi.ResourceSlice, error) {
|
||||
slices, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().List(ctx, metav1.ListOptions{FieldSelector: "driverName=" + d.Name})
|
||||
slices, err := d.f.ClientSet.ResourceV1alpha3().ResourceSlices().List(ctx, metav1.ListOptions{FieldSelector: resourceapi.ResourceSliceSelectorDriver + "=" + d.Name})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
1173
test/e2e/dra/dra.go
1173
test/e2e/dra/dra.go
File diff suppressed because it is too large
Load Diff
@ -14,6 +14,7 @@ nodes:
|
||||
scheduler:
|
||||
extraArgs:
|
||||
v: "5"
|
||||
vmodule: "allocator=6,dynamicresources=6" # structured/allocator.go, DRA scheduler plugin
|
||||
controllerManager:
|
||||
extraArgs:
|
||||
v: "5"
|
||||
|
@ -20,16 +20,13 @@ package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1alpha3"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/informers"
|
||||
@ -48,7 +45,9 @@ type Resources struct {
|
||||
Nodes []string
|
||||
// NodeLabels are labels which determine on which nodes resources are
|
||||
// available. Mutually exclusive with Nodes.
|
||||
NodeLabels labels.Set
|
||||
NodeLabels labels.Set
|
||||
|
||||
// Number of devices called "device-000", "device-001", ... on each node or in the cluster.
|
||||
MaxAllocations int
|
||||
|
||||
// AllocateWrapper, if set, gets called for each Allocate call.
|
||||
@ -68,12 +67,16 @@ func (r Resources) AllNodes(nodeLister listersv1.NodeLister) []string {
|
||||
return r.Nodes
|
||||
}
|
||||
|
||||
func (r Resources) NewAllocation(node string, data []byte) *resourceapi.AllocationResult {
|
||||
allocation := &resourceapi.AllocationResult{}
|
||||
allocation.ResourceHandles = []resourceapi.ResourceHandle{
|
||||
{
|
||||
DriverName: r.DriverName,
|
||||
Data: string(data),
|
||||
func (r Resources) newAllocation(requestName, node string, config []resourceapi.DeviceAllocationConfiguration) *resourceapi.AllocationResult {
|
||||
allocation := &resourceapi.AllocationResult{
|
||||
Devices: resourceapi.DeviceAllocationResult{
|
||||
Results: []resourceapi.DeviceRequestAllocationResult{{
|
||||
Driver: r.DriverName,
|
||||
Pool: "none",
|
||||
Request: requestName,
|
||||
Device: "none",
|
||||
}},
|
||||
Config: config,
|
||||
},
|
||||
}
|
||||
if node == "" && len(r.NodeLabels) > 0 {
|
||||
@ -86,7 +89,7 @@ func (r Resources) NewAllocation(node string, data []byte) *resourceapi.Allocati
|
||||
Values: []string{value},
|
||||
})
|
||||
}
|
||||
allocation.AvailableOnNodes = &v1.NodeSelector{
|
||||
allocation.NodeSelector = &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: requirements,
|
||||
@ -103,7 +106,7 @@ func (r Resources) NewAllocation(node string, data []byte) *resourceapi.Allocati
|
||||
nodes = r.Nodes
|
||||
}
|
||||
if len(nodes) > 0 {
|
||||
allocation.AvailableOnNodes = &v1.NodeSelector{
|
||||
allocation.NodeSelector = &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
@ -166,11 +169,6 @@ func (c *ExampleController) Run(ctx context.Context, workers int) {
|
||||
informerFactory.Shutdown()
|
||||
}
|
||||
|
||||
type parameters struct {
|
||||
EnvVars map[string]string
|
||||
NodeName string
|
||||
}
|
||||
|
||||
var _ controller.Driver = &ExampleController{}
|
||||
|
||||
// GetNumAllocations returns the number of times that a claim was allocated.
|
||||
@ -193,36 +191,6 @@ func (c *ExampleController) GetNumDeallocations() int64 {
|
||||
return c.numDeallocations
|
||||
}
|
||||
|
||||
func (c *ExampleController) GetClassParameters(ctx context.Context, class *resourceapi.ResourceClass) (interface{}, error) {
|
||||
if class.ParametersRef != nil {
|
||||
if class.ParametersRef.APIGroup != "" ||
|
||||
class.ParametersRef.Kind != "ConfigMap" {
|
||||
return nil, fmt.Errorf("class parameters are only supported in APIVersion v1, Kind ConfigMap, got: %v", class.ParametersRef)
|
||||
}
|
||||
return c.readParametersFromConfigMap(ctx, class.ParametersRef.Namespace, class.ParametersRef.Name)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *ExampleController) GetClaimParameters(ctx context.Context, claim *resourceapi.ResourceClaim, class *resourceapi.ResourceClass, classParameters interface{}) (interface{}, error) {
|
||||
if claim.Spec.ParametersRef != nil {
|
||||
if claim.Spec.ParametersRef.APIGroup != "" ||
|
||||
claim.Spec.ParametersRef.Kind != "ConfigMap" {
|
||||
return nil, fmt.Errorf("claim parameters are only supported in APIVersion v1, Kind ConfigMap, got: %v", claim.Spec.ParametersRef)
|
||||
}
|
||||
return c.readParametersFromConfigMap(ctx, claim.Namespace, claim.Spec.ParametersRef.Name)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *ExampleController) readParametersFromConfigMap(ctx context.Context, namespace, name string) (map[string]string, error) {
|
||||
configMap, err := c.clientset.CoreV1().ConfigMaps(namespace).Get(ctx, name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get config map: %w", err)
|
||||
}
|
||||
return configMap.Data, nil
|
||||
}
|
||||
|
||||
func (c *ExampleController) Allocate(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) {
|
||||
|
||||
if c.resources.AllocateWrapper != nil {
|
||||
@ -236,7 +204,7 @@ func (c *ExampleController) Allocate(ctx context.Context, claimAllocations []*co
|
||||
|
||||
func (c *ExampleController) allocateOneByOne(ctx context.Context, claimAllocations []*controller.ClaimAllocation, selectedNode string) {
|
||||
for _, ca := range claimAllocations {
|
||||
allocationResult, err := c.allocateOne(ctx, ca.Claim, ca.ClaimParameters, ca.Class, ca.ClassParameters, selectedNode)
|
||||
allocationResult, err := c.allocateOne(ctx, ca.Claim, ca.DeviceClasses, selectedNode)
|
||||
if err != nil {
|
||||
ca.Error = err
|
||||
continue
|
||||
@ -246,12 +214,25 @@ func (c *ExampleController) allocateOneByOne(ctx context.Context, claimAllocatio
|
||||
}
|
||||
|
||||
// allocate simply copies parameters as JSON map into a ResourceHandle.
|
||||
func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.ResourceClaim, claimParameters interface{}, class *resourceapi.ResourceClass, classParameters interface{}, selectedNode string) (result *resourceapi.AllocationResult, err error) {
|
||||
func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.ResourceClaim, deviceClasses map[string]*resourceapi.DeviceClass, selectedNode string) (result *resourceapi.AllocationResult, err error) {
|
||||
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.FromContext(ctx), "Allocate"), "claim", klog.KObj(claim), "uid", claim.UID)
|
||||
defer func() {
|
||||
logger.V(3).Info("done", "result", result, "err", err)
|
||||
}()
|
||||
|
||||
if len(claim.Spec.Devices.Requests) != 1 ||
|
||||
claim.Spec.Devices.Requests[0].DeviceClassName == "" ||
|
||||
claim.Spec.Devices.Requests[0].AllocationMode != resourceapi.DeviceAllocationModeExactCount ||
|
||||
claim.Spec.Devices.Requests[0].Count != 1 {
|
||||
return nil, errors.New("only claims requesting exactly one device are supported")
|
||||
}
|
||||
request := claim.Spec.Devices.Requests[0]
|
||||
class := deviceClasses[request.DeviceClassName]
|
||||
if len(request.Selectors) > 0 ||
|
||||
class != nil && len(class.Spec.Selectors) > 0 {
|
||||
return nil, errors.New("device selectors are not supported")
|
||||
}
|
||||
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
|
||||
@ -267,24 +248,7 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.
|
||||
nodes := c.resources.AllNodes(c.nodeLister)
|
||||
if c.resources.NodeLocal {
|
||||
node = selectedNode
|
||||
if node == "" {
|
||||
// If none has been selected because we do immediate allocation,
|
||||
// then we need to pick one ourselves.
|
||||
var viableNodes []string
|
||||
for _, n := range nodes {
|
||||
if c.resources.MaxAllocations == 0 ||
|
||||
c.claimsPerNode[n] < c.resources.MaxAllocations {
|
||||
viableNodes = append(viableNodes, n)
|
||||
}
|
||||
}
|
||||
if len(viableNodes) == 0 {
|
||||
return nil, errors.New("resources exhausted on all nodes")
|
||||
}
|
||||
// Pick randomly. We could also prefer the one with the least
|
||||
// number of allocations (even spreading) or the most (packing).
|
||||
node = viableNodes[rand.Intn(len(viableNodes))]
|
||||
logger.V(3).Info("picked a node ourselves", "selectedNode", selectedNode)
|
||||
} else if !contains(nodes, node) ||
|
||||
if !slices.Contains(nodes, node) ||
|
||||
c.resources.MaxAllocations > 0 &&
|
||||
c.claimsPerNode[node] >= c.resources.MaxAllocations {
|
||||
return nil, fmt.Errorf("resources exhausted on node %q", node)
|
||||
@ -297,17 +261,47 @@ func (c *ExampleController) allocateOne(ctx context.Context, claim *resourceapi.
|
||||
}
|
||||
}
|
||||
|
||||
p := parameters{
|
||||
EnvVars: make(map[string]string),
|
||||
NodeName: node,
|
||||
var configs []resourceapi.DeviceAllocationConfiguration
|
||||
for i, config := range claim.Spec.Devices.Config {
|
||||
if len(config.Requests) != 0 &&
|
||||
!slices.Contains(config.Requests, request.Name) {
|
||||
// Does not apply to request.
|
||||
continue
|
||||
}
|
||||
if config.Opaque == nil {
|
||||
return nil, fmt.Errorf("claim config #%d: only opaque configuration supported", i)
|
||||
}
|
||||
if config.Opaque.Driver != c.resources.DriverName {
|
||||
// Does not apply to driver.
|
||||
continue
|
||||
}
|
||||
// A normal driver would validate the config here. The test
|
||||
// driver just passes it through.
|
||||
configs = append(configs,
|
||||
resourceapi.DeviceAllocationConfiguration{
|
||||
Source: resourceapi.AllocationConfigSourceClaim,
|
||||
DeviceConfiguration: config.DeviceConfiguration,
|
||||
},
|
||||
)
|
||||
}
|
||||
toEnvVars("user", claimParameters, p.EnvVars)
|
||||
toEnvVars("admin", classParameters, p.EnvVars)
|
||||
data, err := json.Marshal(p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("encode parameters: %w", err)
|
||||
if class != nil {
|
||||
for i, config := range class.Spec.Config {
|
||||
if config.Opaque == nil {
|
||||
return nil, fmt.Errorf("class config #%d: only opaque configuration supported", i)
|
||||
}
|
||||
if config.Opaque.Driver != c.resources.DriverName {
|
||||
// Does not apply to driver.
|
||||
continue
|
||||
}
|
||||
configs = append(configs,
|
||||
resourceapi.DeviceAllocationConfiguration{
|
||||
Source: resourceapi.AllocationConfigSourceClass,
|
||||
DeviceConfiguration: config.DeviceConfiguration,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
allocation := c.resources.NewAllocation(node, data)
|
||||
allocation := c.resources.newAllocation(request.Name, node, configs)
|
||||
if !alreadyAllocated {
|
||||
c.numAllocations++
|
||||
c.allocated[claim.UID] = node
|
||||
@ -359,7 +353,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
|
||||
// can only work if a node has capacity left
|
||||
// for all of them. Also, nodes that the driver
|
||||
// doesn't run on cannot be used.
|
||||
if !contains(nodes, node) ||
|
||||
if !slices.Contains(nodes, node) ||
|
||||
c.claimsPerNode[node]+len(claims) > c.resources.MaxAllocations {
|
||||
claim.UnsuitableNodes = append(claim.UnsuitableNodes, node)
|
||||
}
|
||||
@ -372,7 +366,7 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
|
||||
for _, claim := range claims {
|
||||
claim.UnsuitableNodes = nil
|
||||
for _, node := range potentialNodes {
|
||||
if !contains(nodes, node) ||
|
||||
if !slices.Contains(nodes, node) ||
|
||||
allocations+len(claims) > c.resources.MaxAllocations {
|
||||
claim.UnsuitableNodes = append(claim.UnsuitableNodes, node)
|
||||
}
|
||||
@ -381,24 +375,3 @@ func (c *ExampleController) UnsuitableNodes(ctx context.Context, pod *v1.Pod, cl
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func toEnvVars(what string, from interface{}, to map[string]string) {
|
||||
if from == nil {
|
||||
return
|
||||
}
|
||||
|
||||
env := from.(map[string]string)
|
||||
for key, value := range env {
|
||||
to[what+"_"+strings.ToLower(key)] = value
|
||||
}
|
||||
}
|
||||
|
||||
func contains[T comparable](list []T, value T) bool {
|
||||
for _, v := range list {
|
||||
if v == value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@ -46,15 +49,14 @@ type ExamplePlugin struct {
|
||||
d kubeletplugin.DRAPlugin
|
||||
fileOps FileOperations
|
||||
|
||||
cdiDir string
|
||||
driverName string
|
||||
nodeName string
|
||||
instances sets.Set[string]
|
||||
cdiDir string
|
||||
driverName string
|
||||
nodeName string
|
||||
deviceNames sets.Set[string]
|
||||
|
||||
mutex sync.Mutex
|
||||
instancesInUse sets.Set[string]
|
||||
prepared map[ClaimID][]string // instance names
|
||||
gRPCCalls []GRPCCall
|
||||
mutex sync.Mutex
|
||||
prepared map[ClaimID][]Device // prepared claims -> result of nodePrepareResource
|
||||
gRPCCalls []GRPCCall
|
||||
|
||||
blockPrepareResourcesMutex sync.Mutex
|
||||
blockUnprepareResourcesMutex sync.Mutex
|
||||
@ -88,11 +90,18 @@ type ClaimID struct {
|
||||
UID string
|
||||
}
|
||||
|
||||
type Device struct {
|
||||
PoolName string
|
||||
DeviceName string
|
||||
RequestName string
|
||||
CDIDeviceID string
|
||||
}
|
||||
|
||||
var _ drapb.NodeServer = &ExamplePlugin{}
|
||||
|
||||
// getJSONFilePath returns the absolute path where CDI file is/should be.
|
||||
func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string {
|
||||
return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s.json", ex.driverName, claimUID))
|
||||
func (ex *ExamplePlugin) getJSONFilePath(claimUID string, requestName string) string {
|
||||
return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s-%s.json", ex.driverName, claimUID, requestName))
|
||||
}
|
||||
|
||||
// FileOperations defines optional callbacks for handling CDI files
|
||||
@ -105,10 +114,13 @@ type FileOperations struct {
|
||||
// file does not exist.
|
||||
Remove func(name string) error
|
||||
|
||||
// NumResourceInstances determines whether the plugin reports resources
|
||||
// instances and how many. A negative value causes it to report "not implemented"
|
||||
// in the NodeListAndWatchResources gRPC call.
|
||||
NumResourceInstances int
|
||||
// NumDevices determines whether the plugin reports devices
|
||||
// and how many. It reports nothing if negative.
|
||||
NumDevices int
|
||||
|
||||
// Pre-defined devices, with each device name mapped to
|
||||
// the device attributes. Not used if NumDevices >= 0.
|
||||
Devices map[string]map[resourceapi.QualifiedName]resourceapi.DeviceAttribute
|
||||
}
|
||||
|
||||
// StartPlugin sets up the servers that are necessary for a DRA kubelet plugin.
|
||||
@ -129,22 +141,23 @@ func StartPlugin(ctx context.Context, cdiDir, driverName string, kubeClient kube
|
||||
}
|
||||
}
|
||||
ex := &ExamplePlugin{
|
||||
stopCh: ctx.Done(),
|
||||
logger: logger,
|
||||
kubeClient: kubeClient,
|
||||
fileOps: fileOps,
|
||||
cdiDir: cdiDir,
|
||||
driverName: driverName,
|
||||
nodeName: nodeName,
|
||||
instances: sets.New[string](),
|
||||
instancesInUse: sets.New[string](),
|
||||
prepared: make(map[ClaimID][]string),
|
||||
stopCh: ctx.Done(),
|
||||
logger: logger,
|
||||
kubeClient: kubeClient,
|
||||
fileOps: fileOps,
|
||||
cdiDir: cdiDir,
|
||||
driverName: driverName,
|
||||
nodeName: nodeName,
|
||||
prepared: make(map[ClaimID][]Device),
|
||||
deviceNames: sets.New[string](),
|
||||
}
|
||||
|
||||
for i := 0; i < ex.fileOps.NumResourceInstances; i++ {
|
||||
ex.instances.Insert(fmt.Sprintf("instance-%02d", i))
|
||||
for i := 0; i < ex.fileOps.NumDevices; i++ {
|
||||
ex.deviceNames.Insert(fmt.Sprintf("device-%02d", i))
|
||||
}
|
||||
for deviceName := range ex.fileOps.Devices {
|
||||
ex.deviceNames.Insert(deviceName)
|
||||
}
|
||||
|
||||
opts = append(opts,
|
||||
kubeletplugin.DriverName(driverName),
|
||||
kubeletplugin.NodeName(nodeName),
|
||||
@ -158,19 +171,30 @@ func StartPlugin(ctx context.Context, cdiDir, driverName string, kubeClient kube
|
||||
}
|
||||
ex.d = d
|
||||
|
||||
if fileOps.NumResourceInstances >= 0 {
|
||||
instances := make([]resourceapi.NamedResourcesInstance, ex.fileOps.NumResourceInstances)
|
||||
for i := 0; i < ex.fileOps.NumResourceInstances; i++ {
|
||||
instances[i].Name = fmt.Sprintf("instance-%02d", i)
|
||||
if fileOps.NumDevices >= 0 {
|
||||
devices := make([]resourceapi.Device, ex.fileOps.NumDevices)
|
||||
for i := 0; i < ex.fileOps.NumDevices; i++ {
|
||||
devices[i] = resourceapi.Device{
|
||||
Name: fmt.Sprintf("device-%02d", i),
|
||||
Basic: &resourceapi.BasicDevice{},
|
||||
}
|
||||
}
|
||||
nodeResources := []*resourceapi.ResourceModel{
|
||||
{
|
||||
NamedResources: &resourceapi.NamedResourcesResources{
|
||||
Instances: instances,
|
||||
},
|
||||
},
|
||||
resources := kubeletplugin.Resources{
|
||||
Devices: devices,
|
||||
}
|
||||
ex.d.PublishResources(ctx, nodeResources)
|
||||
ex.d.PublishResources(ctx, resources)
|
||||
} else if len(ex.fileOps.Devices) > 0 {
|
||||
devices := make([]resourceapi.Device, len(ex.fileOps.Devices))
|
||||
for i, deviceName := range sets.List(ex.deviceNames) {
|
||||
devices[i] = resourceapi.Device{
|
||||
Name: deviceName,
|
||||
Basic: &resourceapi.BasicDevice{Attributes: ex.fileOps.Devices[deviceName]},
|
||||
}
|
||||
}
|
||||
resources := kubeletplugin.Resources{
|
||||
Devices: devices,
|
||||
}
|
||||
ex.d.PublishResources(ctx, resources)
|
||||
}
|
||||
|
||||
return ex, nil
|
||||
@ -245,17 +269,15 @@ func (ex *ExamplePlugin) getUnprepareResourcesFailure() error {
|
||||
return ex.unprepareResourcesFailure
|
||||
}
|
||||
|
||||
// NodePrepareResource ensures that the CDI file for the claim exists. It uses
|
||||
// NodePrepareResource ensures that the CDI file(s) (one per request) for the claim exists. It uses
|
||||
// a deterministic name to simplify NodeUnprepareResource (no need to remember
|
||||
// or discover the name) and idempotency (when called again, the file simply
|
||||
// gets written again).
|
||||
func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drapb.Claim) ([]string, error) {
|
||||
func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drapb.Claim) ([]Device, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// The plugin must retrieve the claim itself to get it in the version
|
||||
// that it understands.
|
||||
var resourceHandle string
|
||||
var structuredResourceHandle *resourceapi.StructuredResourceHandle
|
||||
claim, err := ex.kubeClient.ResourceV1alpha3().ResourceClaims(claimReq.Namespace).Get(ctx, claimReq.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("retrieve claim %s/%s: %w", claimReq.Namespace, claimReq.Name, err)
|
||||
@ -263,127 +285,113 @@ func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimReq *drap
|
||||
if claim.Status.Allocation == nil {
|
||||
return nil, fmt.Errorf("claim %s/%s not allocated", claimReq.Namespace, claimReq.Name)
|
||||
}
|
||||
if claim.UID != types.UID(claimReq.Uid) {
|
||||
if claim.UID != types.UID(claimReq.UID) {
|
||||
return nil, fmt.Errorf("claim %s/%s got replaced", claimReq.Namespace, claimReq.Name)
|
||||
}
|
||||
haveResources := false
|
||||
for _, handle := range claim.Status.Allocation.ResourceHandles {
|
||||
if handle.DriverName == ex.driverName {
|
||||
haveResources = true
|
||||
resourceHandle = handle.Data
|
||||
structuredResourceHandle = handle.StructuredData
|
||||
break
|
||||
}
|
||||
}
|
||||
if !haveResources {
|
||||
// Nothing to do.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
ex.mutex.Lock()
|
||||
defer ex.mutex.Unlock()
|
||||
ex.blockPrepareResourcesMutex.Lock()
|
||||
defer ex.blockPrepareResourcesMutex.Unlock()
|
||||
|
||||
deviceName := "claim-" + claimReq.Uid
|
||||
vendor := ex.driverName
|
||||
class := "test"
|
||||
dev := vendor + "/" + class + "=" + deviceName
|
||||
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.Uid}
|
||||
if _, ok := ex.prepared[claimID]; ok {
|
||||
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.UID}
|
||||
if result, ok := ex.prepared[claimID]; ok {
|
||||
// Idempotent call, nothing to do.
|
||||
return []string{dev}, nil
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Determine environment variables.
|
||||
var p parameters
|
||||
var instanceNames []string
|
||||
if structuredResourceHandle == nil {
|
||||
// Control plane controller did the allocation.
|
||||
if err := json.Unmarshal([]byte(resourceHandle), &p); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal resource handle: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Scheduler did the allocation with structured parameters.
|
||||
p.NodeName = structuredResourceHandle.NodeName
|
||||
if err := extractParameters(structuredResourceHandle.VendorClassParameters, &p.EnvVars, "admin"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := extractParameters(structuredResourceHandle.VendorClaimParameters, &p.EnvVars, "user"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, result := range structuredResourceHandle.Results {
|
||||
if err := extractParameters(result.VendorRequestParameters, &p.EnvVars, "user"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
namedResources := result.NamedResources
|
||||
if namedResources == nil {
|
||||
return nil, errors.New("missing named resources allocation result")
|
||||
}
|
||||
instanceName := namedResources.Name
|
||||
if instanceName == "" {
|
||||
return nil, errors.New("empty named resources instance name")
|
||||
}
|
||||
if !ex.instances.Has(instanceName) {
|
||||
return nil, fmt.Errorf("unknown allocated instance %q", instanceName)
|
||||
}
|
||||
if ex.instancesInUse.Has(instanceName) {
|
||||
return nil, fmt.Errorf("resource instance %q used more than once", instanceName)
|
||||
}
|
||||
instanceNames = append(instanceNames, instanceName)
|
||||
}
|
||||
}
|
||||
var devices []Device
|
||||
for _, result := range claim.Status.Allocation.Devices.Results {
|
||||
requestName := result.Request
|
||||
|
||||
// Sanity check scheduling.
|
||||
if p.NodeName != "" && ex.nodeName != "" && p.NodeName != ex.nodeName {
|
||||
return nil, fmt.Errorf("claim was allocated for %q, cannot be prepared on %q", p.NodeName, ex.nodeName)
|
||||
}
|
||||
// The driver joins all env variables in the order in which
|
||||
// they appear in results (last one wins).
|
||||
env := make(map[string]string)
|
||||
for i, config := range claim.Status.Allocation.Devices.Config {
|
||||
if config.Opaque == nil ||
|
||||
config.Opaque.Driver != ex.driverName ||
|
||||
len(config.Requests) > 0 && !slices.Contains(config.Requests, requestName) {
|
||||
continue
|
||||
}
|
||||
if err := extractParameters(config.Opaque.Parameters, &env, config.Source == resourceapi.AllocationConfigSourceClass); err != nil {
|
||||
return nil, fmt.Errorf("parameters in config #%d: %w", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
// CDI wants env variables as set of strings.
|
||||
envs := []string{}
|
||||
for key, val := range p.EnvVars {
|
||||
envs = append(envs, key+"="+val)
|
||||
}
|
||||
// It also sets a claim_<claim name>_<request name>=true env variable.
|
||||
// This can be used to identify which devices where mapped into a container.
|
||||
claimReqName := "claim_" + claim.Name + "_" + requestName
|
||||
claimReqName = regexp.MustCompile(`[^a-zA-Z0-9]`).ReplaceAllString(claimReqName, "_")
|
||||
env[claimReqName] = "true"
|
||||
|
||||
spec := &spec{
|
||||
Version: "0.3.0", // This has to be a version accepted by the runtimes.
|
||||
Kind: vendor + "/" + class,
|
||||
// At least one device is required and its entry must have more
|
||||
// than just the name.
|
||||
Devices: []device{
|
||||
{
|
||||
Name: deviceName,
|
||||
ContainerEdits: containerEdits{
|
||||
Env: envs,
|
||||
deviceName := "claim-" + claimReq.UID + "-" + requestName
|
||||
vendor := ex.driverName
|
||||
class := "test"
|
||||
cdiDeviceID := vendor + "/" + class + "=" + deviceName
|
||||
|
||||
// CDI wants env variables as set of strings.
|
||||
envs := []string{}
|
||||
for key, val := range env {
|
||||
envs = append(envs, key+"="+val)
|
||||
}
|
||||
sort.Strings(envs)
|
||||
|
||||
if len(envs) == 0 {
|
||||
// CDI does not support empty ContainerEdits. For example,
|
||||
// kubelet+crio then fail with:
|
||||
// CDI device injection failed: unresolvable CDI devices ...
|
||||
//
|
||||
// Inject nothing instead, which is supported by DRA.
|
||||
continue
|
||||
}
|
||||
|
||||
spec := &spec{
|
||||
Version: "0.3.0", // This has to be a version accepted by the runtimes.
|
||||
Kind: vendor + "/" + class,
|
||||
// At least one device is required and its entry must have more
|
||||
// than just the name.
|
||||
Devices: []device{
|
||||
{
|
||||
Name: deviceName,
|
||||
ContainerEdits: containerEdits{
|
||||
Env: envs,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
filePath := ex.getJSONFilePath(claimReq.Uid)
|
||||
buffer, err := json.Marshal(spec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal spec: %w", err)
|
||||
}
|
||||
if err := ex.fileOps.Create(filePath, buffer); err != nil {
|
||||
return nil, fmt.Errorf("failed to write CDI file %v", err)
|
||||
}
|
||||
filePath := ex.getJSONFilePath(claimReq.UID, requestName)
|
||||
buffer, err := json.Marshal(spec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal spec: %w", err)
|
||||
}
|
||||
if err := ex.fileOps.Create(filePath, buffer); err != nil {
|
||||
return nil, fmt.Errorf("failed to write CDI file: %w", err)
|
||||
}
|
||||
device := Device{
|
||||
PoolName: result.Pool,
|
||||
DeviceName: result.Device,
|
||||
RequestName: requestName,
|
||||
CDIDeviceID: cdiDeviceID,
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
ex.prepared[claimID] = instanceNames
|
||||
for _, instanceName := range instanceNames {
|
||||
ex.instancesInUse.Insert(instanceName)
|
||||
}
|
||||
|
||||
logger.V(3).Info("CDI file created", "path", filePath, "device", dev)
|
||||
return []string{dev}, nil
|
||||
logger.V(3).Info("CDI file(s) created", "devices", devices)
|
||||
ex.prepared[claimID] = devices
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func extractParameters(parameters runtime.RawExtension, env *map[string]string, kind string) error {
|
||||
func extractParameters(parameters runtime.RawExtension, env *map[string]string, admin bool) error {
|
||||
if len(parameters.Raw) == 0 {
|
||||
return nil
|
||||
}
|
||||
kind := "user"
|
||||
if admin {
|
||||
kind = "admin"
|
||||
}
|
||||
var data map[string]string
|
||||
if err := json.Unmarshal(parameters.Raw, &data); err != nil {
|
||||
return fmt.Errorf("decoding %s parameters: %v", kind, err)
|
||||
return fmt.Errorf("decoding %s parameters: %w", kind, err)
|
||||
}
|
||||
if len(data) > 0 && *env == nil {
|
||||
*env = make(map[string]string)
|
||||
@ -404,15 +412,23 @@ func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapb.No
|
||||
}
|
||||
|
||||
for _, claimReq := range req.Claims {
|
||||
cdiDevices, err := ex.nodePrepareResource(ctx, claimReq)
|
||||
devices, err := ex.nodePrepareResource(ctx, claimReq)
|
||||
if err != nil {
|
||||
resp.Claims[claimReq.Uid] = &drapb.NodePrepareResourceResponse{
|
||||
resp.Claims[claimReq.UID] = &drapb.NodePrepareResourceResponse{
|
||||
Error: err.Error(),
|
||||
}
|
||||
} else {
|
||||
resp.Claims[claimReq.Uid] = &drapb.NodePrepareResourceResponse{
|
||||
CDIDevices: cdiDevices,
|
||||
r := &drapb.NodePrepareResourceResponse{}
|
||||
for _, device := range devices {
|
||||
pbDevice := &drapb.Device{
|
||||
PoolName: device.PoolName,
|
||||
DeviceName: device.DeviceName,
|
||||
RequestNames: []string{device.RequestName},
|
||||
CDIDeviceIDs: []string{device.CDIDeviceID},
|
||||
}
|
||||
r.Devices = append(r.Devices, pbDevice)
|
||||
}
|
||||
resp.Claims[claimReq.UID] = r
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
@ -427,27 +443,23 @@ func (ex *ExamplePlugin) nodeUnprepareResource(ctx context.Context, claimReq *dr
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
filePath := ex.getJSONFilePath(claimReq.Uid)
|
||||
if err := ex.fileOps.Remove(filePath); err != nil {
|
||||
return fmt.Errorf("error removing CDI file: %w", err)
|
||||
}
|
||||
logger.V(3).Info("CDI file removed", "path", filePath)
|
||||
|
||||
ex.mutex.Lock()
|
||||
defer ex.mutex.Unlock()
|
||||
|
||||
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.Uid}
|
||||
instanceNames, ok := ex.prepared[claimID]
|
||||
claimID := ClaimID{Name: claimReq.Name, UID: claimReq.UID}
|
||||
devices, ok := ex.prepared[claimID]
|
||||
if !ok {
|
||||
// Idempotent call, nothing to do.
|
||||
return nil
|
||||
}
|
||||
|
||||
delete(ex.prepared, claimID)
|
||||
for _, instanceName := range instanceNames {
|
||||
ex.instancesInUse.Delete(instanceName)
|
||||
for _, device := range devices {
|
||||
filePath := ex.getJSONFilePath(claimReq.UID, device.RequestName)
|
||||
if err := ex.fileOps.Remove(filePath); err != nil {
|
||||
return fmt.Errorf("error removing CDI file: %w", err)
|
||||
}
|
||||
logger.V(3).Info("CDI file removed", "path", filePath)
|
||||
}
|
||||
|
||||
delete(ex.prepared, claimID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -463,11 +475,11 @@ func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapb.
|
||||
for _, claimReq := range req.Claims {
|
||||
err := ex.nodeUnprepareResource(ctx, claimReq)
|
||||
if err != nil {
|
||||
resp.Claims[claimReq.Uid] = &drapb.NodeUnprepareResourceResponse{
|
||||
resp.Claims[claimReq.UID] = &drapb.NodeUnprepareResourceResponse{
|
||||
Error: err.Error(),
|
||||
}
|
||||
} else {
|
||||
resp.Claims[claimReq.Uid] = &drapb.NodeUnprepareResourceResponse{}
|
||||
resp.Claims[claimReq.UID] = &drapb.NodeUnprepareResourceResponse{}
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
|
8
test/e2e/dra/test-driver/deploy/example/deviceclass.yaml
Normal file
8
test/e2e/dra/test-driver/deploy/example/deviceclass.yaml
Normal file
@ -0,0 +1,8 @@
|
||||
apiVersion: resource.k8s.io/v1alpha3
|
||||
kind: ResourceClass
|
||||
metadata:
|
||||
name: example
|
||||
spec:
|
||||
selectors:
|
||||
- cel:
|
||||
expression: device.driver == "test-driver.cdi.k8s.io"
|
@ -47,7 +47,7 @@ spec:
|
||||
matchConstraints:
|
||||
resourceRules:
|
||||
- apiGroups: ["resource.k8s.io"]
|
||||
apiVersions: ["v1alpha2"]
|
||||
apiVersions: ["v1alpha3"]
|
||||
operations: ["CREATE", "UPDATE", "DELETE"]
|
||||
resources: ["resourceslices"]
|
||||
variables:
|
||||
@ -59,7 +59,7 @@ spec:
|
||||
request.userInfo.username == "system:serviceaccount:dra-kubelet-plugin-namespace:dra-kubelet-plugin-service-account"
|
||||
- name: objectNodeName
|
||||
expression: >-
|
||||
(request.operation == "DELETE" ? oldObject : object).?nodeName.orValue("")
|
||||
(request.operation == "DELETE" ? oldObject : object).spec.?nodeName.orValue("")
|
||||
validations:
|
||||
- expression: >-
|
||||
!variables.isKubeletPlugin || variables.hasNodeName
|
||||
|
@ -1,18 +1,10 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: example-claim-parameters
|
||||
namespace: default
|
||||
data:
|
||||
a: b
|
||||
---
|
||||
apiVersion: resource.k8s.io/v1alpha3
|
||||
kind: ResourceClaim
|
||||
metadata:
|
||||
name: example
|
||||
namespace: default
|
||||
spec:
|
||||
resourceClassName: example
|
||||
parametersRef:
|
||||
kind: ConfigMap
|
||||
name: example-claim-parameters
|
||||
devices:
|
||||
requests:
|
||||
- name: req-0
|
||||
deviceClassName: example
|
||||
|
@ -1,7 +0,0 @@
|
||||
apiVersion: resource.k8s.io/v1alpha3
|
||||
kind: ResourceClass
|
||||
metadata:
|
||||
name: example
|
||||
driverName: test-driver.cdi.k8s.io
|
||||
# TODO:
|
||||
# parameters
|
@ -30,17 +30,20 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"github.com/onsi/gomega/gstruct"
|
||||
"github.com/onsi/gomega/types"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1alpha3"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
draplugin "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
|
||||
@ -417,10 +420,9 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
|
||||
})
|
||||
|
||||
ginkgo.It("must run pod if NodePrepareResources is in progress for one plugin when Kubelet restarts", func(ctx context.Context) {
|
||||
_, kubeletPlugin2 := start(ctx)
|
||||
kubeletPlugin := newKubeletPlugin(ctx, f.ClientSet, getNodeName(ctx, f), driverName)
|
||||
kubeletPlugin1, kubeletPlugin2 := start(ctx)
|
||||
|
||||
unblock := kubeletPlugin.BlockNodePrepareResources()
|
||||
unblock := kubeletPlugin1.BlockNodePrepareResources()
|
||||
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod", true, []string{kubeletPlugin1Name, kubeletPlugin2Name})
|
||||
|
||||
ginkgo.By("wait for pod to be in Pending state")
|
||||
@ -478,9 +480,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
|
||||
}
|
||||
|
||||
matchResourcesByNodeName := func(nodeName string) types.GomegaMatcher {
|
||||
return gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
|
||||
"NodeName": gomega.Equal(nodeName),
|
||||
})
|
||||
return gomega.HaveField("Spec.NodeName", gomega.Equal(nodeName))
|
||||
}
|
||||
|
||||
f.It("must be removed on kubelet startup", f.WithDisruptive(), func(ctx context.Context) {
|
||||
@ -562,7 +562,7 @@ func newKubeletPlugin(ctx context.Context, clientSet kubernetes.Interface, nodeN
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
// kubelet should do this eventually, but better make sure.
|
||||
// A separate test checks this explicitly.
|
||||
framework.ExpectNoError(clientSet.ResourceV1alpha3().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: "driverName=" + driverName}))
|
||||
framework.ExpectNoError(clientSet.ResourceV1alpha3().ResourceSlices().DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{FieldSelector: resourceapi.ResourceSliceSelectorDriver + "=" + driverName}))
|
||||
})
|
||||
ginkgo.DeferCleanup(plugin.Stop)
|
||||
|
||||
@ -573,18 +573,17 @@ func newKubeletPlugin(ctx context.Context, clientSet kubernetes.Interface, nodeN
|
||||
// NOTE: as scheduler and controller manager are not running by the Node e2e,
|
||||
// the objects must contain all required data to be processed correctly by the API server
|
||||
// and placed on the node without involving the scheduler and the DRA controller
|
||||
func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string, deferPodDeletion bool, pluginNames []string) *v1.Pod {
|
||||
// ResourceClass
|
||||
class := &resourceapi.ResourceClass{
|
||||
func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string, deferPodDeletion bool, driverNames []string) *v1.Pod {
|
||||
// DeviceClass
|
||||
class := &resourceapi.DeviceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: "controller",
|
||||
}
|
||||
_, err := clientSet.ResourceV1alpha3().ResourceClasses().Create(ctx, class, metav1.CreateOptions{})
|
||||
_, err := clientSet.ResourceV1alpha3().DeviceClasses().Create(ctx, class, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().ResourceClasses().Delete, className, metav1.DeleteOptions{})
|
||||
ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().DeviceClasses().Delete, className, metav1.DeleteOptions{})
|
||||
|
||||
// ResourceClaim
|
||||
podClaimName := "resource-claim"
|
||||
@ -593,7 +592,12 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
|
||||
Name: claimName,
|
||||
},
|
||||
Spec: resourceapi.ResourceClaimSpec{
|
||||
ResourceClassName: className,
|
||||
Devices: resourceapi.DeviceClaim{
|
||||
Requests: []resourceapi.DeviceRequest{{
|
||||
Name: "my-request",
|
||||
DeviceClassName: className,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
createdClaim, err := clientSet.ResourceV1alpha3().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{})
|
||||
@ -601,7 +605,18 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
|
||||
|
||||
ginkgo.DeferCleanup(clientSet.ResourceV1alpha3().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{})
|
||||
|
||||
// Pod
|
||||
// The pod checks its own env with grep. Each driver injects its own parameters,
|
||||
// with the driver name as part of the variable name. Sorting ensures that a
|
||||
// single grep can match the output of env when that gets turned into a single
|
||||
// line because the order is deterministic.
|
||||
nameToEnv := func(driverName string) string {
|
||||
return "DRA_" + regexp.MustCompile(`[^a-z0-9]`).ReplaceAllString(driverName, "_")
|
||||
}
|
||||
var expectedEnv []string
|
||||
sort.Strings(driverNames)
|
||||
for _, driverName := range driverNames {
|
||||
expectedEnv = append(expectedEnv, nameToEnv(driverName)+"=PARAM1_VALUE")
|
||||
}
|
||||
containerName := "testcontainer"
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -623,7 +638,9 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
|
||||
Resources: v1.ResourceRequirements{
|
||||
Claims: []v1.ResourceClaim{{Name: podClaimName}},
|
||||
},
|
||||
Command: []string{"/bin/sh", "-c", "env | grep DRA_PARAM1=PARAM1_VALUE"},
|
||||
// If injecting env variables fails, the pod fails and this error shows up in
|
||||
// ... Terminated:&ContainerStateTerminated{ExitCode:1,Signal:0,Reason:Error,Message:ERROR: ...
|
||||
Command: []string{"/bin/sh", "-c", "if ! echo $(env) | grep -q " + strings.Join(expectedEnv, ".*") + "; then echo ERROR: unexpected env: $(env) >/dev/termination-log; exit 1 ; fi"},
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
@ -637,21 +654,36 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
|
||||
}
|
||||
|
||||
// Update claim status: set ReservedFor and AllocationResult
|
||||
// NOTE: This is usually done by the DRA controller
|
||||
resourceHandlers := make([]resourceapi.ResourceHandle, len(pluginNames))
|
||||
for i, pluginName := range pluginNames {
|
||||
resourceHandlers[i] = resourceapi.ResourceHandle{
|
||||
DriverName: pluginName,
|
||||
Data: "{\"EnvVars\":{\"DRA_PARAM1\":\"PARAM1_VALUE\"},\"NodeName\":\"\"}",
|
||||
// NOTE: This is usually done by the DRA controller or the scheduler.
|
||||
results := make([]resourceapi.DeviceRequestAllocationResult, len(driverNames))
|
||||
config := make([]resourceapi.DeviceAllocationConfiguration, len(driverNames))
|
||||
for i, driverName := range driverNames {
|
||||
results[i] = resourceapi.DeviceRequestAllocationResult{
|
||||
Driver: driverName,
|
||||
Pool: "some-pool",
|
||||
Device: "some-device",
|
||||
Request: claim.Spec.Devices.Requests[0].Name,
|
||||
}
|
||||
config[i] = resourceapi.DeviceAllocationConfiguration{
|
||||
Source: resourceapi.AllocationConfigSourceClaim,
|
||||
DeviceConfiguration: resourceapi.DeviceConfiguration{
|
||||
Opaque: &resourceapi.OpaqueDeviceConfiguration{
|
||||
Driver: driverName,
|
||||
Parameters: runtime.RawExtension{Raw: []byte(`{"` + nameToEnv(driverName) + `":"PARAM1_VALUE"}`)},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
createdClaim.Status = resourceapi.ResourceClaimStatus{
|
||||
DriverName: "controller",
|
||||
ReservedFor: []resourceapi.ResourceClaimConsumerReference{
|
||||
{Resource: "pods", Name: podName, UID: createdPod.UID},
|
||||
},
|
||||
Allocation: &resourceapi.AllocationResult{
|
||||
ResourceHandles: resourceHandlers,
|
||||
Devices: resourceapi.DeviceAllocationResult{
|
||||
Results: results,
|
||||
Config: config,
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err = clientSet.ResourceV1alpha3().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{})
|
||||
@ -665,10 +697,13 @@ func createTestResourceSlice(ctx context.Context, clientSet kubernetes.Interface
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeName,
|
||||
},
|
||||
NodeName: nodeName,
|
||||
DriverName: driverName,
|
||||
ResourceModel: resourceapi.ResourceModel{
|
||||
NamedResources: &resourceapi.NamedResourcesResources{},
|
||||
Spec: resourceapi.ResourceSliceSpec{
|
||||
NodeName: nodeName,
|
||||
Driver: driverName,
|
||||
Pool: resourceapi.ResourcePool{
|
||||
Name: nodeName,
|
||||
ResourceSliceCount: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user