mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 22:05:59 +00:00
Merge pull request #123776 from dims/adjust-gpu-test-to-work-with-latest-nvidia-daemonset
Adjust GPU test to work with latest nvidia daemonset on AWS/ec2
This commit is contained in:
commit
bf7fcfdc7f
@ -62,34 +62,40 @@ var (
|
|||||||
|
|
||||||
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
|
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
|
||||||
podName := testPodNamePrefix + string(uuid.NewUUID())
|
podName := testPodNamePrefix + string(uuid.NewUUID())
|
||||||
|
testContainers := []v1.Container{
|
||||||
|
{
|
||||||
|
Name: "vector-addition-cuda8",
|
||||||
|
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "vector-addition-cuda10",
|
||||||
|
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd2),
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
testPod := &v1.Pod{
|
testPod := &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: podName,
|
Name: podName,
|
||||||
},
|
},
|
||||||
Spec: v1.PodSpec{
|
Spec: v1.PodSpec{
|
||||||
RestartPolicy: v1.RestartPolicyNever,
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
Containers: []v1.Container{
|
|
||||||
{
|
|
||||||
Name: "vector-addition-cuda8",
|
|
||||||
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
|
|
||||||
Resources: v1.ResourceRequirements{
|
|
||||||
Limits: v1.ResourceList{
|
|
||||||
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "vector-addition-cuda10",
|
|
||||||
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd2),
|
|
||||||
Resources: v1.ResourceRequirements{
|
|
||||||
Limits: v1.ResourceList{
|
|
||||||
gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testPod.Spec.Containers = testContainers
|
||||||
|
if os.Getenv("TEST_MAX_GPU_COUNT") == "1" {
|
||||||
|
testPod.Spec.Containers = []v1.Container{testContainers[0]}
|
||||||
|
}
|
||||||
|
framework.Logf("testPod.Spec.Containers {%#v}", testPod.Spec.Containers)
|
||||||
return testPod
|
return testPod
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,6 +115,10 @@ func areGPUsAvailableOnAllSchedulableNodes(ctx context.Context, f *framework.Fra
|
|||||||
if node.Spec.Unschedulable {
|
if node.Spec.Unschedulable {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
_, isControlPlane := node.Labels["node-role.kubernetes.io/control-plane"]
|
||||||
|
if isControlPlane {
|
||||||
|
continue
|
||||||
|
}
|
||||||
framework.Logf("gpuResourceName %s", gpuResourceName)
|
framework.Logf("gpuResourceName %s", gpuResourceName)
|
||||||
if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
|
if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
|
||||||
framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
|
framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
|
||||||
@ -137,12 +147,14 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework, setupResour
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
var ds *appsv1.DaemonSet
|
var ds *appsv1.DaemonSet
|
||||||
|
dsNamespace := f.Namespace.Name
|
||||||
dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET")
|
dsYamlURLFromEnv := os.Getenv("NVIDIA_DRIVER_INSTALLER_DAEMONSET")
|
||||||
if dsYamlURLFromEnv != "" {
|
if dsYamlURLFromEnv != "" {
|
||||||
// Using DaemonSet from remote URL
|
// Using DaemonSet from remote URL
|
||||||
framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv)
|
framework.Logf("Using remote nvidia-driver-installer daemonset manifest from %v", dsYamlURLFromEnv)
|
||||||
ds, err = e2emanifest.DaemonSetFromURL(ctx, dsYamlURLFromEnv)
|
ds, err = e2emanifest.DaemonSetFromURL(ctx, dsYamlURLFromEnv)
|
||||||
framework.ExpectNoError(err, "failed get remote")
|
framework.ExpectNoError(err, "failed get remote")
|
||||||
|
dsNamespace = ds.Namespace
|
||||||
} else {
|
} else {
|
||||||
// Using default local DaemonSet
|
// Using default local DaemonSet
|
||||||
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
|
framework.Logf("Using default local nvidia-driver-installer daemonset manifest.")
|
||||||
@ -152,12 +164,11 @@ func SetupNVIDIAGPUNode(ctx context.Context, f *framework.Framework, setupResour
|
|||||||
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to parse local manifest for nvidia-driver-installer daemonset")
|
||||||
}
|
}
|
||||||
gpuResourceName = e2egpu.NVIDIAGPUResourceName
|
gpuResourceName = e2egpu.NVIDIAGPUResourceName
|
||||||
ds.Namespace = f.Namespace.Name
|
_, err = f.ClientSet.AppsV1().DaemonSets(dsNamespace).Create(ctx, ds, metav1.CreateOptions{})
|
||||||
_, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, ds, metav1.CreateOptions{})
|
|
||||||
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to create nvidia-driver-installer daemonset")
|
||||||
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
|
framework.Logf("Successfully created daemonset to install Nvidia drivers.")
|
||||||
|
|
||||||
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
pods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, dsNamespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
||||||
framework.ExpectNoError(err, "failed to get pods controlled by the nvidia-driver-installer daemonset")
|
framework.ExpectNoError(err, "failed to get pods controlled by the nvidia-driver-installer daemonset")
|
||||||
|
|
||||||
devicepluginPods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))
|
devicepluginPods, err := e2eresource.WaitForControlledPods(ctx, f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))
|
||||||
|
Loading…
Reference in New Issue
Block a user