mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 05:03:09 +00:00
Merge pull request #53336 from jiayingz/e2e-flaky
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fixes test/e2e_node/gpu_device_plugin.go test failure. **What this PR does / why we need it**: **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes # fixes https://github.com/kubernetes/kubernetes/issues/53354 **Special notes for your reviewer**: **Release note**: ```release-note ```
This commit is contained in:
commit
762d1e42dc
@ -18,6 +18,7 @@ package e2e_node
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -59,7 +60,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
|
|||||||
By("Waiting for GPUs to become available on the local node")
|
By("Waiting for GPUs to become available on the local node")
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
||||||
}, 10*time.Second, time.Second).Should(BeTrue())
|
}, 10*time.Second, framework.Poll).Should(BeTrue())
|
||||||
|
|
||||||
if framework.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
|
if framework.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
|
||||||
Skip("Not enough GPUs to execute this test (at least two needed)")
|
Skip("Not enough GPUs to execute this test (at least two needed)")
|
||||||
@ -138,6 +139,9 @@ func newDecimalResourceList(name v1.ResourceName, quantity int64) v1.ResourceLis
|
|||||||
|
|
||||||
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
|
// TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
|
||||||
func restartKubelet(f *framework.Framework) {
|
func restartKubelet(f *framework.Framework) {
|
||||||
|
beforeSocks, err := filepath.Glob("/var/lib/kubelet/device-plugins/nvidiaGPU*.sock")
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
Expect(len(beforeSocks)).NotTo(BeZero())
|
||||||
stdout, err := exec.Command("sudo", "systemctl", "list-units", "kubelet*", "--state=running").CombinedOutput()
|
stdout, err := exec.Command("sudo", "systemctl", "list-units", "kubelet*", "--state=running").CombinedOutput()
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
regex := regexp.MustCompile("(kubelet-[0-9]+)")
|
regex := regexp.MustCompile("(kubelet-[0-9]+)")
|
||||||
@ -146,19 +150,21 @@ func restartKubelet(f *framework.Framework) {
|
|||||||
kube := matches[0]
|
kube := matches[0]
|
||||||
framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kube)
|
framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kube)
|
||||||
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
|
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
|
||||||
if err == nil {
|
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
||||||
return
|
Eventually(func() ([]string, error) {
|
||||||
}
|
return filepath.Glob("/var/lib/kubelet/device-plugins/nvidiaGPU*.sock")
|
||||||
framework.Failf("Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
}, 5*time.Minute, framework.Poll).ShouldNot(ConsistOf(beforeSocks))
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDeviceId(f *framework.Framework, podName string, contName string, restartCount int32) string {
|
func getDeviceId(f *framework.Framework, podName string, contName string, restartCount int32) string {
|
||||||
// Wait till pod has been restarted at least restartCount times.
|
// Wait till pod has been restarted at least restartCount times.
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
p, err := f.PodClient().Get(podName, metav1.GetOptions{})
|
p, err := f.PodClient().Get(podName, metav1.GetOptions{})
|
||||||
framework.ExpectNoError(err)
|
if err != nil || len(p.Status.ContainerStatuses) < 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
return p.Status.ContainerStatuses[0].RestartCount >= restartCount
|
return p.Status.ContainerStatuses[0].RestartCount >= restartCount
|
||||||
}, time.Minute, time.Second).Should(BeTrue())
|
}, 5*time.Minute, framework.Poll).Should(BeTrue())
|
||||||
logs, err := framework.GetPodLogs(f.ClientSet, f.Namespace.Name, podName, contName)
|
logs, err := framework.GetPodLogs(f.ClientSet, f.Namespace.Name, podName, contName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
framework.Failf("GetPodLogs for pod %q failed: %v", podName, err)
|
framework.Failf("GetPodLogs for pod %q failed: %v", podName, err)
|
||||||
|
Loading…
Reference in New Issue
Block a user