mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 10:43:56 +00:00
Merge pull request #106544 from ehashman/fix-flake-restart
Deflake "Kubelet should correctly account for terminated pods after restart"
This commit is contained in:
commit
21d3acc787
@ -73,7 +73,7 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
|
|||||||
podCount = 100
|
podCount = 100
|
||||||
podCreationInterval = 100 * time.Millisecond
|
podCreationInterval = 100 * time.Millisecond
|
||||||
recoverTimeout = 5 * time.Minute
|
recoverTimeout = 5 * time.Minute
|
||||||
startTimeout = 5 * time.Minute
|
startTimeout = 3 * time.Minute
|
||||||
// restartCount is chosen so even with minPods we exhaust the default
|
// restartCount is chosen so even with minPods we exhaust the default
|
||||||
// allocation of a /24.
|
// allocation of a /24.
|
||||||
minPods = 50
|
minPods = 50
|
||||||
@ -150,6 +150,10 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
|
|||||||
if numCpus < 1 {
|
if numCpus < 1 {
|
||||||
e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
|
e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
|
||||||
}
|
}
|
||||||
|
if numCpus > 18 {
|
||||||
|
// 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod
|
||||||
|
e2eskipper.Skipf("test will return false positives on a machine with >18 cores")
|
||||||
|
}
|
||||||
|
|
||||||
// create as many restartNever pods as there are allocatable CPU
|
// create as many restartNever pods as there are allocatable CPU
|
||||||
// nodes; if they are not correctly accounted for as terminated
|
// nodes; if they are not correctly accounted for as terminated
|
||||||
@ -161,7 +165,7 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
|
|||||||
pod.Spec.RestartPolicy = "Never"
|
pod.Spec.RestartPolicy = "Never"
|
||||||
pod.Spec.Containers[0].Command = []string{"echo", "hi"}
|
pod.Spec.Containers[0].Command = []string{"echo", "hi"}
|
||||||
pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
|
pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
|
||||||
v1.ResourceCPU: resource.MustParse("1"),
|
v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
createBatchPodWithRateControl(f, restartNeverPods, podCreationInterval)
|
createBatchPodWithRateControl(f, restartNeverPods, podCreationInterval)
|
||||||
@ -199,10 +203,12 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
|
|||||||
// restart may think these old pods are consuming CPU and we
|
// restart may think these old pods are consuming CPU and we
|
||||||
// will get an OutOfCpu error.
|
// will get an OutOfCpu error.
|
||||||
ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
|
ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
|
||||||
|
for start := time.Now(); time.Since(start) < startTimeout; time.Sleep(10 * time.Second) {
|
||||||
postRestartRunningPods := waitForPods(f, numAllPods, recoverTimeout)
|
postRestartRunningPods := waitForPods(f, numAllPods, recoverTimeout)
|
||||||
if len(postRestartRunningPods) < numAllPods {
|
if len(postRestartRunningPods) < numAllPods {
|
||||||
framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
|
framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user