mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 12:43:23 +00:00
Fixes a flakiness in GPUDevicePlugin e2e test.
Waits till nvidia gpu disappears from all nodes after deleting the device plug DaemonSet to make sure its pods are deleted from all nodes.
This commit is contained in:
parent
ce4afa8418
commit
65b76f361e
@ -139,6 +139,24 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func areGPUsAvailableOnAnySchedulableNodes(f *framework.Framework) bool {
|
||||
framework.Logf("Getting list of Nodes from API server")
|
||||
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
framework.ExpectNoError(err, "getting node list")
|
||||
for _, node := range nodeList.Items {
|
||||
if node.Spec.Unschedulable {
|
||||
continue
|
||||
}
|
||||
framework.Logf("gpuResourceName %s", gpuResourceName)
|
||||
if val, ok := node.Status.Capacity[gpuResourceName]; ok && val.Value() > 0 {
|
||||
framework.Logf("Nvidia GPUs available on Node: %q", node.Name)
|
||||
return true
|
||||
}
|
||||
}
|
||||
framework.Logf("Nvidia GPUs don't exist on all schedulable nodes")
|
||||
return false
|
||||
}
|
||||
|
||||
func getGPUsAvailable(f *framework.Framework) int64 {
|
||||
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
framework.ExpectNoError(err, "getting node list")
|
||||
@ -220,10 +238,10 @@ var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
|
||||
err = f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
|
||||
framework.ExpectNoError(err, "failed to delete daemonset")
|
||||
framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.")
|
||||
// Wait for Nvidia GPUs to be not available on nodes
|
||||
// Wait for Nvidia GPUs to be unavailable on all nodes.
|
||||
Eventually(func() bool {
|
||||
return !areGPUsAvailableOnAllSchedulableNodes(f)
|
||||
}, 5*time.Minute, time.Second).Should(BeTrue())
|
||||
return !areGPUsAvailableOnAnySchedulableNodes(f)
|
||||
}, 10*time.Minute, time.Second).Should(BeTrue())
|
||||
|
||||
// 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised
|
||||
// on the nodes and we can run pods using GPUs.
|
||||
|
Loading…
Reference in New Issue
Block a user