Merge pull request #118574 from bart0sh/PR118-DRA-E2E-Node-test-grpc-timeout

DRA: E2E Node: test GRPC timeout
This commit is contained in:
Kubernetes Prow Robot 2023-06-13 03:39:58 -07:00 committed by GitHub
commit 8fd27c6137
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 1 deletions

View File

@ -17,6 +17,8 @@ limitations under the License.
package app package app
import ( import (
"strings"
"github.com/onsi/gomega/gcustom" "github.com/onsi/gomega/gcustom"
) )
@ -30,3 +32,13 @@ var BeRegistered = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error
} }
return false, nil return false, nil
}).WithMessage("contain successful NotifyRegistrationStatus call") }).WithMessage("contain successful NotifyRegistrationStatus call")
// NodePrepareResouceCalled checks that NodePrepareResource API has been called
var NodePrepareResourceCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResource") && call.Err == nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain NodePrepareResource call")

View File

@ -43,6 +43,8 @@ type ExamplePlugin struct {
mutex sync.Mutex mutex sync.Mutex
prepared map[ClaimID]bool prepared map[ClaimID]bool
gRPCCalls []GRPCCall gRPCCalls []GRPCCall
block bool
} }
type GRPCCall struct { type GRPCCall struct {
@ -135,6 +137,12 @@ func (ex *ExamplePlugin) IsRegistered() bool {
return status.PluginRegistered return status.PluginRegistered
} }
// Block sets a flag to block Node[Un]PrepareResources
// to emulate time consuming or stuck calls
func (ex *ExamplePlugin) Block() {
ex.block = true
}
// NodePrepareResource ensures that the CDI file for the claim exists. It uses // NodePrepareResource ensures that the CDI file for the claim exists. It uses
// a deterministic name to simplify NodeUnprepareResource (no need to remember // a deterministic name to simplify NodeUnprepareResource (no need to remember
// or discover the name) and idempotency (when called again, the file simply // or discover the name) and idempotency (when called again, the file simply
@ -142,6 +150,13 @@ func (ex *ExamplePlugin) IsRegistered() bool {
func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) { func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) {
logger := klog.FromContext(ctx) logger := klog.FromContext(ctx)
// Block to emulate plugin stuckness or slowness.
// By default the call will not be blocked as ex.block = false.
if ex.block {
<-ctx.Done()
return nil, ctx.Err()
}
// Determine environment variables. // Determine environment variables.
var p parameters var p parameters
if err := json.Unmarshal([]byte(req.ResourceHandle), &p); err != nil { if err := json.Unmarshal([]byte(req.ResourceHandle), &p); err != nil {
@ -202,6 +217,13 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N
func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) { func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) {
logger := klog.FromContext(ctx) logger := klog.FromContext(ctx)
// Block to emulate plugin stuckness or slowness.
// By default the call will not be blocked as ex.block = false.
if ex.block {
<-ctx.Done()
return nil, ctx.Err()
}
filePath := ex.getJSONFilePath(req.ClaimUid) filePath := ex.getJSONFilePath(req.ClaimUid)
if err := ex.fileOps.Remove(filePath); err != nil { if err := ex.fileOps.Remove(filePath); err != nil {
return nil, fmt.Errorf("error removing CDI file: %w", err) return nil, fmt.Errorf("error removing CDI file: %w", err)

View File

@ -39,6 +39,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
admissionapi "k8s.io/pod-security-admission/api" admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
@ -55,6 +56,7 @@ const (
pluginRegistrationPath = "/var/lib/kubelet/plugins_registry" pluginRegistrationPath = "/var/lib/kubelet/plugins_registry"
draAddress = "/var/lib/kubelet/plugins/test-driver/dra.sock" draAddress = "/var/lib/kubelet/plugins/test-driver/dra.sock"
pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered
podInPendingStateTimeout = time.Second * 60 // how long to wait for a pod to stay in pending state
) )
var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][NodeFeature:DynamicResourceAllocation]", func() { var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][NodeFeature:DynamicResourceAllocation]", func() {
@ -106,6 +108,27 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][Node
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout) err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout)
framework.ExpectNoError(err) framework.ExpectNoError(err)
}) })
ginkgo.It("must keep pod in pending state if NodePrepareResource times out", func(ctx context.Context) {
ginkgo.By("set delay for the NodePrepareResource call")
kubeletPlugin.Block()
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod")
ginkgo.By("wait for pod to be in Pending state")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) {
return pod.Status.Phase == v1.PodPending, nil
})
framework.ExpectNoError(err)
ginkgo.By("wait for NodePrepareResource call")
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(dra.PluginClientTimeout * 2).Should(testdriver.NodePrepareResourceCalled)
// TODO: Check condition or event when implemented
// see https://github.com/kubernetes/kubernetes/issues/118468 for details
ginkgo.By("check that pod is consistently in Pending state")
gomega.Consistently(ctx, e2epod.Get(f.ClientSet, pod)).WithTimeout(podInPendingStateTimeout).Should(e2epod.BeInPhase(v1.PodPending),
"Pod should be in Pending state as resource preparation time outed")
})
}) })
}) })
@ -177,7 +200,8 @@ func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, node
containerName := "testcontainer" containerName := "testcontainer"
pod := &v1.Pod{ pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: podName, Name: podName,
Namespace: namespace,
}, },
Spec: v1.PodSpec{ Spec: v1.PodSpec{
NodeName: nodename, // Assign the node as the scheduler is not running NodeName: nodename, // Assign the node as the scheduler is not running