From e5f25ccb620793b02455e3e1085fd0b6502a63e6 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 25 Sep 2023 15:08:22 +0200 Subject: [PATCH] k8s.io/dynamic-resource-allocation: fix compatibility with Kubernetes 1.27 Kubernetes 1.28 introduced generated resource claim names, with the actual name recorded in the pod status. As part of that change, the helper library for DRA drivers was updated to match that behavior. Since then, driver deployments were either compatible with Kubernetes 1.27 when using k8s.io/dynamic-resource-allocation 0.27 or Kubernetes 1.28 when using 0.28, but never both. This was okay because this is an alpha feature, but it makes testing of DRA drivers harder (in particular because cloud providers have not all updated to 1.28 yet) and can be fixed fairly easily. Therefore adding support for 1.27 is worthwhile and reasonable. --- .../controller/controller.go | 5 +- .../k8s.io/dynamic-resource-allocation/go.mod | 2 +- .../resourceclaim/resourceclaim.go | 75 ++++++++++++++++++- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go index da2941191cf..e3f8912429a 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go @@ -156,6 +156,7 @@ type controller struct { driver Driver setReservedFor bool kubeClient kubernetes.Interface + claimNameLookup *resourceclaim.Lookup queue workqueue.RateLimitingInterface eventRecorder record.EventRecorder rcLister resourcev1alpha2listers.ResourceClassLister @@ -180,6 +181,7 @@ func New( rcInformer := informerFactory.Resource().V1alpha2().ResourceClasses() claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims() schedulingCtxInformer := informerFactory.Resource().V1alpha2().PodSchedulingContexts() + claimNameLookup := resourceclaim.NewNameLookup(kubeClient) eventBroadcaster := record.NewBroadcaster() go func() { @@ -218,6 +220,7 @@ func New( driver: driver, setReservedFor: true, kubeClient: kubeClient, + claimNameLookup: claimNameLookup, rcLister: rcInformer.Lister(), rcSynced: rcInformer.Informer().HasSynced, claimCache: claimCache, @@ -645,7 +648,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc } func (ctrl *controller) checkPodClaim(ctx context.Context, pod *v1.Pod, podClaim v1.PodResourceClaim) (*ClaimAllocation, error) { - claimName, mustCheckOwner, err := resourceclaim.Name(pod, &podClaim) + claimName, mustCheckOwner, err := ctrl.claimNameLookup.Name(pod, &podClaim) if err != nil { return nil, err } diff --git a/staging/src/k8s.io/dynamic-resource-allocation/go.mod b/staging/src/k8s.io/dynamic-resource-allocation/go.mod index bef4825c2a5..18b40003470 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/go.mod +++ b/staging/src/k8s.io/dynamic-resource-allocation/go.mod @@ -14,6 +14,7 @@ require ( k8s.io/client-go v0.0.0 k8s.io/klog/v2 v2.100.1 k8s.io/kubelet v0.0.0 + k8s.io/utils v0.0.0-20230726121419-3b25d923346b ) require ( @@ -50,7 +51,6 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/kube-openapi v0.0.0-20230905202853-d090da108d2f // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.3.0 // indirect sigs.k8s.io/yaml v1.3.0 // indirect diff --git a/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/resourceclaim.go b/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/resourceclaim.go index 3fb1bced3d6..93d69695ef4 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/resourceclaim.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/resourceclaim.go @@ -26,10 +26,14 @@ package resourceclaim import ( "errors" "fmt" + "os" + "strings" v1 "k8s.io/api/core/v1" resourcev1alpha2 "k8s.io/api/resource/v1alpha2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/utils/ptr" ) var ( @@ -59,9 +63,7 @@ var ( // In this case the boolean determines whether IsForPod must be called // after retrieving the ResourceClaim and before using it. // -// If podClaim.Template is not nil, the caller must check that the -// ResourceClaim is indeed the one that was created for the Pod by calling -// IsUsable. +// Determining the name depends on Kubernetes >= 1.28. func Name(pod *v1.Pod, podClaim *v1.PodResourceClaim) (name *string, mustCheckOwner bool, err error) { switch { case podClaim.Source.ResourceClaimName != nil: @@ -78,6 +80,73 @@ func Name(pod *v1.Pod, podClaim *v1.PodResourceClaim) (name *string, mustCheckOw } } +// NewNameLookup returns an object which handles determining the name of +// a ResourceClaim. In contrast to the stand-alone Name it is compatible +// also with Kubernetes < 1.28. +// +// Providing a client is optional. If none is available, then code can pass nil +// and users can set the DRA_WITH_DETERMINISTIC_RESOURCE_CLAIM_NAMES env +// variable to an arbitrary non-empty value to use the naming from Kubernetes < +// 1.28. +func NewNameLookup(client kubernetes.Interface) *Lookup { + return &Lookup{client: client} +} + +// Lookup stores the state which is necessary to look up ResourceClaim names. +type Lookup struct { + client kubernetes.Interface + usePodStatus *bool +} + +// Name is a variant of the stand-alone Name with support also for Kubernetes < 1.28. +func (l *Lookup) Name(pod *v1.Pod, podClaim *v1.PodResourceClaim) (name *string, mustCheckOwner bool, err error) { + if l.usePodStatus == nil { + if value, _ := os.LookupEnv("DRA_WITH_DETERMINISTIC_RESOURCE_CLAIM_NAMES"); value != "" { + l.usePodStatus = ptr.To(false) + } else if l.client != nil { + // Check once. This does not detect upgrades or + // downgrades, but that is good enough for the simple + // test scenarios that the Kubernetes < 1.28 support is + // meant for. + info, err := l.client.Discovery().ServerVersion() + if err != nil { + return nil, false, fmt.Errorf("look up server version: %v", err) + } + if info.Major == "" { + // Fake client... + l.usePodStatus = ptr.To(true) + } else { + switch strings.Compare(info.Major, "1") { + case -1: + // Huh? + l.usePodStatus = ptr.To(false) + case 0: + // info.Minor may have a suffix which makes it larger than 28. + // We don't care about pre-releases here. + l.usePodStatus = ptr.To(strings.Compare("28", info.Minor) <= 0) + case 1: + // Kubernetes 2? Yeah! + l.usePodStatus = ptr.To(true) + } + } + } + } + + if *l.usePodStatus { + return Name(pod, podClaim) + } + + switch { + case podClaim.Source.ResourceClaimName != nil: + return podClaim.Source.ResourceClaimName, false, nil + case podClaim.Source.ResourceClaimTemplateName != nil: + name := pod.Name + "-" + podClaim.Name + return &name, true, nil + default: + return nil, false, fmt.Errorf(`pod "%s/%s", spec.resourceClaim %q: %w`, pod.Namespace, pod.Name, podClaim.Name, ErrAPIUnsupported) + } +} + // IsForPod checks that the ResourceClaim is the one that // was created for the Pod. It returns an error that is informative // enough to be returned by the caller without adding further details