mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 22:05:59 +00:00
Adjust GPU test to work with latest nvidia daemonset
Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
parent
546f7c3086
commit
30857658e4
@ -62,13 +62,7 @@ var (
|
|||||||
|
|
||||||
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
|
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
|
||||||
podName := testPodNamePrefix + string(uuid.NewUUID())
|
podName := testPodNamePrefix + string(uuid.NewUUID())
|
||||||
testPod := &v1.Pod{
|
testContainers := []v1.Container{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
|
||||||
Name: podName,
|
|
||||||
},
|
|
||||||
Spec: v1.PodSpec{
|
|
||||||
RestartPolicy: v1.RestartPolicyNever,
|
|
||||||
Containers: []v1.Container{
|
|
||||||
{
|
{
|
||||||
Name: "vector-addition-cuda8",
|
Name: "vector-addition-cuda8",
|
||||||
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
|
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
|
||||||
@ -87,9 +81,21 @@ func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
}
|
||||||
|
testPod := &v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
},
|
},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testPod.Spec.Containers = testContainers
|
||||||
|
if os.Getenv("TEST_MAX_GPU_COUNT") == "1" {
|
||||||
|
testPod.Spec.Containers = []v1.Container{testContainers[0]}
|
||||||
|
}
|
||||||
|
framework.Logf("testPod.Spec.Containers {%#v}", testPod.Spec.Containers)
|
||||||
return testPod
|
return testPod
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,6 +115,10 @@ func areGPUsAvailableOnAllSchedulableNodes(ctx context.Context, f *framework.Fra
|
|||||||
if node.Spec.Unschedulable {
|
if node.Spec.Unschedulable {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
_, isControlPlane := node.Labels["node-role.kubernetes.io/control-plane"]
|
||||||
|
if isControlPlane {
|
||||||
|
continue
|
||||||
|
}
|
||||||
framework.Logf("gpuResourceName %s", gpuResourceName)
|
framework.Logf("gpuResourceName %s", gpuResourceName)
|
||||||
if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
|
if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
|
||||||
framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
|
framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
|
||||||
@ -137,12 +147,14 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework, setupResour
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
var ds *appsv1.DaemonSet
|
var ds *appsv1.DaemonSet
|
||||||
|
dsNamespace := f.Namespace.Name
|
||||||
dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET")
|
dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET")
|
||||||
if dsYamlURLFromEnv != "" {
|
if dsYamlURLFromEnv != "" {
|
||||||
// Using DaemonSet from remote URL
|
// Using DaemonSet from remote URL
|
||||||
framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv)
|
framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv)
|
||||||
ds, err = e2emanifest.DaemonSetFromURL(ctx, dsYamlURLFromEnv)
|
ds, err = e2emanifest.DaemonSetFromURL(ctx, dsYamlURLFromEnv)
|
||||||
framework.ExpectNoError(err, "failed get remote")
|
framework.ExpectNoError(err, "failed get remote")
|
||||||
|
dsNamespace = ds.Namespace
|
||||||
} else {
|
} else {
|
||||||
// Using default local DaemonSet
|
// Using default local DaemonSet
|
||||||
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
|
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
|
||||||
@ -152,12 +164,11 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework, setupResour
|
|||||||
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
|
||||||
}
|
}
|
||||||
gpuResourceName = e2egpu.NVIDIAGPUResourceName
|
gpuResourceName = e2egpu.NVIDIAGPUResourceName
|
||||||
ds.Namespace = f.Namespace.Name
|
_, err = f.ClientSet.AppsV1().DaemonSets(dsNamespace).Create(ctx, ds, metav1.CreateOptions{})
|
||||||
_, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{})
|
|
||||||
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
|
||||||
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
|
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
|
||||||
|
|
||||||
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, dsNamespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
||||||
framework.ExpectNoError(err, "failed to get pods controlled by the nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to get pods controlled by the nvidia-driver-installer daemonset")
|
||||||
|
|
||||||
devicepluginPods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))
|
devicepluginPods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))
|
||||||
|
Loading…
Reference in New Issue
Block a user