diff --git a/test/e2e/dra/test-driver/app/gomega.go b/test/e2e/dra/test-driver/app/gomega.go index 4caa7805041..7bb70984652 100644 --- a/test/e2e/dra/test-driver/app/gomega.go +++ b/test/e2e/dra/test-driver/app/gomega.go @@ -17,6 +17,8 @@ limitations under the License. package app import ( + "strings" + "github.com/onsi/gomega/gcustom" ) @@ -30,3 +32,13 @@ var BeRegistered = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error } return false, nil }).WithMessage("contain successful NotifyRegistrationStatus call") + +// NodePrepareResouceCalled checks that NodePrepareResource API has been called +var NodePrepareResourceCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) { + for _, call := range actualCalls { + if strings.HasSuffix(call.FullMethod, "/NodePrepareResource") && call.Err == nil { + return true, nil + } + } + return false, nil +}).WithMessage("contain NodePrepareResource call") diff --git a/test/e2e/dra/test-driver/app/kubeletplugin.go b/test/e2e/dra/test-driver/app/kubeletplugin.go index f899e8da404..a53de6b2116 100644 --- a/test/e2e/dra/test-driver/app/kubeletplugin.go +++ b/test/e2e/dra/test-driver/app/kubeletplugin.go @@ -43,6 +43,8 @@ type ExamplePlugin struct { mutex sync.Mutex prepared map[ClaimID]bool gRPCCalls []GRPCCall + + block bool } type GRPCCall struct { @@ -135,6 +137,12 @@ func (ex *ExamplePlugin) IsRegistered() bool { return status.PluginRegistered } +// Block sets a flag to block Node[Un]PrepareResources +// to emulate time consuming or stuck calls +func (ex *ExamplePlugin) Block() { + ex.block = true +} + // NodePrepareResource ensures that the CDI file for the claim exists. It uses // a deterministic name to simplify NodeUnprepareResource (no need to remember // or discover the name) and idempotency (when called again, the file simply @@ -142,6 +150,13 @@ func (ex *ExamplePlugin) IsRegistered() bool { func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) { logger := klog.FromContext(ctx) + // Block to emulate plugin stuckness or slowness. + // By default the call will not be blocked as ex.block = false. + if ex.block { + <-ctx.Done() + return nil, ctx.Err() + } + // Determine environment variables. var p parameters if err := json.Unmarshal([]byte(req.ResourceHandle), &p); err != nil { @@ -202,6 +217,13 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) { logger := klog.FromContext(ctx) + // Block to emulate plugin stuckness or slowness. + // By default the call will not be blocked as ex.block = false. + if ex.block { + <-ctx.Done() + return nil, ctx.Err() + } + filePath := ex.getJSONFilePath(req.ClaimUid) if err := ex.fileOps.Remove(filePath); err != nil { return nil, fmt.Errorf("error removing CDI file: %w", err) diff --git a/test/e2e_node/dra_test.go b/test/e2e_node/dra_test.go index 2b283614fbe..b4ea0ef0b81 100644 --- a/test/e2e_node/dra_test.go +++ b/test/e2e_node/dra_test.go @@ -39,6 +39,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" + dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/kubernetes/test/e2e/framework" @@ -55,6 +56,7 @@ const ( pluginRegistrationPath = "/var/lib/kubelet/plugins_registry" draAddress = "/var/lib/kubelet/plugins/test-driver/dra.sock" pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered + podInPendingStateTimeout = time.Second * 60 // how long to wait for a pod to stay in pending state ) var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][NodeFeature:DynamicResourceAllocation]", func() { @@ -106,6 +108,27 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][Node err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout) framework.ExpectNoError(err) }) + + ginkgo.It("must keep pod in pending state if NodePrepareResource times out", func(ctx context.Context) { + ginkgo.By("set delay for the NodePrepareResource call") + kubeletPlugin.Block() + pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod") + + ginkgo.By("wait for pod to be in Pending state") + err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) { + return pod.Status.Phase == v1.PodPending, nil + }) + framework.ExpectNoError(err) + + ginkgo.By("wait for NodePrepareResource call") + gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(dra.PluginClientTimeout * 2).Should(testdriver.NodePrepareResourceCalled) + + // TODO: Check condition or event when implemented + // see https://github.com/kubernetes/kubernetes/issues/118468 for details + ginkgo.By("check that pod is consistently in Pending state") + gomega.Consistently(ctx, e2epod.Get(f.ClientSet, pod)).WithTimeout(podInPendingStateTimeout).Should(e2epod.BeInPhase(v1.PodPending), + "Pod should be in Pending state as resource preparation time outed") + }) }) }) @@ -177,7 +200,8 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node containerName := "testcontainer" pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: podName, + Name: podName, + Namespace: namespace, }, Spec: v1.PodSpec{ NodeName: nodename, // Assign the node as the scheduler is not running