mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 03:33:56 +00:00
DRA: add node tests
- Setup overall test structure - Tested Kubelet plugin re-registration on plugin and Kubelet restarts - Tested pod processing on Kubelet start
This commit is contained in:
parent
f812ac5445
commit
58162ffd63
230
test/e2e_node/dra_test.go
Normal file
230
test/e2e_node/dra_test.go
Normal file
@ -0,0 +1,230 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
E2E Node test for DRA (Dynamic Resource Allocation)
|
||||
This test covers node-specific aspects of DRA
|
||||
The test can be run locally on Linux this way:
|
||||
make test-e2e-node FOCUS='\[NodeFeature:DynamicResourceAllocation\]' SKIP='\[Flaky\]' PARALLELISM=1 \
|
||||
TEST_ARGS='--feature-gates="DynamicResourceAllocation=true" --service-feature-gates="DynamicResourceAllocation=true" --runtime-config=api/all=true'
|
||||
*/
|
||||
|
||||
package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
|
||||
"k8s.io/dynamic-resource-allocation/kubeletplugin"
|
||||
testdriver "k8s.io/kubernetes/test/e2e/dra/test-driver/app"
|
||||
)
|
||||
|
||||
const (
|
||||
driverName = "test-driver.cdi.k8s.io"
|
||||
cdiDir = "/var/run/cdi"
|
||||
endpoint = "/var/lib/kubelet/plugins/test-driver/dra.sock"
|
||||
pluginRegistrationPath = "/var/lib/kubelet/plugins_registry"
|
||||
draAddress = "/var/lib/kubelet/plugins/test-driver/dra.sock"
|
||||
pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered
|
||||
)
|
||||
|
||||
var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][NodeFeature:DynamicResourceAllocation]", func() {
|
||||
f := framework.NewDefaultFramework("dra-node")
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
|
||||
|
||||
var kubeletPlugin *testdriver.ExamplePlugin
|
||||
|
||||
ginkgo.Context("Resource Kubelet Plugin [Serial]", func() {
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f))
|
||||
})
|
||||
|
||||
ginkgo.It("must register after Kubelet restart", func(ctx context.Context) {
|
||||
oldCalls := kubeletPlugin.GetGRPCCalls()
|
||||
getNewCalls := func() []testdriver.GRPCCall {
|
||||
calls := kubeletPlugin.GetGRPCCalls()
|
||||
return calls[len(oldCalls):]
|
||||
}
|
||||
|
||||
ginkgo.By("restarting Kubelet")
|
||||
restartKubelet(true)
|
||||
|
||||
ginkgo.By("wait for Kubelet plugin re-registration")
|
||||
gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
|
||||
})
|
||||
|
||||
ginkgo.It("must register after plugin restart", func(ctx context.Context) {
|
||||
ginkgo.By("restart Kubelet Plugin")
|
||||
kubeletPlugin.Stop()
|
||||
kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f))
|
||||
|
||||
ginkgo.By("wait for Kubelet plugin re-registration")
|
||||
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
|
||||
})
|
||||
|
||||
ginkgo.It("must process pod created when kubelet is not running", func(ctx context.Context) {
|
||||
// Stop Kubelet
|
||||
startKubelet := stopKubelet()
|
||||
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod")
|
||||
// Pod must be in pending state
|
||||
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) {
|
||||
return pod.Status.Phase == v1.PodPending, nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
// Start Kubelet
|
||||
startKubelet()
|
||||
// Pod should succeed
|
||||
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Run Kubelet plugin and wait until it's registered
|
||||
func newKubeletPlugin(nodeName string) *testdriver.ExamplePlugin {
|
||||
ginkgo.By("start Kubelet plugin")
|
||||
logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", nodeName)
|
||||
|
||||
// Ensure that directories exist, creating them if necessary. We want
|
||||
// to know early if there is a setup problem that would prevent
|
||||
// creating those directories.
|
||||
err := os.MkdirAll(cdiDir, os.FileMode(0750))
|
||||
framework.ExpectNoError(err, "create CDI directory")
|
||||
err = os.MkdirAll(filepath.Dir(endpoint), 0750)
|
||||
framework.ExpectNoError(err, "create socket directory")
|
||||
|
||||
plugin, err := testdriver.StartPlugin(
|
||||
logger,
|
||||
cdiDir,
|
||||
driverName,
|
||||
"",
|
||||
testdriver.FileOperations{},
|
||||
kubeletplugin.PluginSocketPath(endpoint),
|
||||
kubeletplugin.RegistrarSocketPath(path.Join(pluginRegistrationPath, driverName+"-reg.sock")),
|
||||
kubeletplugin.KubeletPluginSocketPath(draAddress),
|
||||
)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
gomega.Eventually(plugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
|
||||
|
||||
ginkgo.DeferCleanup(plugin.Stop)
|
||||
|
||||
return plugin
|
||||
}
|
||||
|
||||
// createTestObjects creates objects required by the test
|
||||
// NOTE: as scheduler and controller manager are not running by the Node e2e,
|
||||
// the objects must contain all required data to be processed correctly by the API server
|
||||
// and placed on the node without involving the scheduler and the DRA controller
|
||||
func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string) *v1.Pod {
|
||||
// ResourceClass
|
||||
class := &resourcev1alpha2.ResourceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: driverName,
|
||||
}
|
||||
_, err := clientSet.ResourceV1alpha2().ResourceClasses().Create(ctx, class, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClasses().Delete, className, metav1.DeleteOptions{})
|
||||
|
||||
// ResourceClaim
|
||||
podClaimName := "resource-claim"
|
||||
claim := &resourcev1alpha2.ResourceClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: claimName,
|
||||
},
|
||||
Spec: resourcev1alpha2.ResourceClaimSpec{
|
||||
ResourceClassName: className,
|
||||
},
|
||||
}
|
||||
createdClaim, err := clientSet.ResourceV1alpha2().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{})
|
||||
|
||||
// Pod
|
||||
containerName := "testcontainer"
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: nodename, // Assign the node as the scheduler is not running
|
||||
ResourceClaims: []v1.PodResourceClaim{
|
||||
{
|
||||
Name: podClaimName,
|
||||
Source: v1.ClaimSource{
|
||||
ResourceClaimName: &claimName,
|
||||
},
|
||||
},
|
||||
},
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: e2epod.GetDefaultTestImage(),
|
||||
Resources: v1.ResourceRequirements{
|
||||
Claims: []v1.ResourceClaim{{Name: podClaimName}},
|
||||
},
|
||||
Command: []string{"/bin/sh", "-c", "env | grep DRA_PARAM1=PARAM1_VALUE"},
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
},
|
||||
}
|
||||
createdPod, err := clientSet.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.DeferCleanup(clientSet.CoreV1().Pods(namespace).Delete, podName, metav1.DeleteOptions{})
|
||||
|
||||
// Update claim status: set ReservedFor and AllocationResult
|
||||
// NOTE: This is usually done by the DRA controller
|
||||
createdClaim.Status = resourcev1alpha2.ResourceClaimStatus{
|
||||
DriverName: driverName,
|
||||
ReservedFor: []resourcev1alpha2.ResourceClaimConsumerReference{
|
||||
{Resource: "pods", Name: podName, UID: createdPod.UID},
|
||||
},
|
||||
Allocation: &resourcev1alpha2.AllocationResult{
|
||||
ResourceHandles: []resourcev1alpha2.ResourceHandle{
|
||||
{
|
||||
DriverName: driverName,
|
||||
Data: "{\"EnvVars\":{\"DRA_PARAM1\":\"PARAM1_VALUE\"},\"NodeName\":\"\"}",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err = clientSet.ResourceV1alpha2().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
return pod
|
||||
}
|
Loading…
Reference in New Issue
Block a user