mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-18 08:09:58 +00:00
Merge pull request #124795 from atiratree/fix-daemon-max-surge-flake
e2e: DaemonSet maxSurge test should account for terminated pods that are terminated by the test
This commit is contained in:
commit
51ad0bbb73
@ -585,10 +585,12 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
|
|||||||
nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
|
nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
nodeCount := len(nodes.Items)
|
nodeCount := len(nodes.Items)
|
||||||
retryTimeout := dsRetryTimeout + time.Duration(nodeCount*30)*time.Second
|
// We disturb daemonset progress by randomly terminating pods.
|
||||||
|
randomPodTerminationTimeout := 5 * time.Minute
|
||||||
|
retryTimeout := dsRetryTimeout + randomPodTerminationTimeout + time.Duration(nodeCount*30)*time.Second
|
||||||
|
|
||||||
ginkgo.By("Check that daemon pods surge and invariants are preserved during that rollout")
|
ginkgo.By("Check that daemon pods surge and invariants are preserved during that rollout")
|
||||||
ageOfOldPod := make(map[string]time.Time)
|
nodeToAgeOfOldPod := make(map[string]map[string]time.Time)
|
||||||
deliberatelyDeletedPods := sets.NewString()
|
deliberatelyDeletedPods := sets.NewString()
|
||||||
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, func(ctx context.Context) (bool, error) {
|
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, func(ctx context.Context) (bool, error) {
|
||||||
podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{})
|
podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{})
|
||||||
@ -682,17 +684,25 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
|
|||||||
// if this is a pod in an older version AND there is a new version of this pod, record when
|
// if this is a pod in an older version AND there is a new version of this pod, record when
|
||||||
// we started seeing this, otherwise delete the record (perhaps the node was drained)
|
// we started seeing this, otherwise delete the record (perhaps the node was drained)
|
||||||
if nodesToVersions[pod.Spec.NodeName][newVersion] > 0 {
|
if nodesToVersions[pod.Spec.NodeName][newVersion] > 0 {
|
||||||
if _, ok := ageOfOldPod[string(pod.UID)]; !ok {
|
if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)]; !ok {
|
||||||
ageOfOldPod[string(pod.UID)] = now
|
if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName]; !ok {
|
||||||
|
nodeToAgeOfOldPod[pod.Spec.NodeName] = make(map[string]time.Time)
|
||||||
|
}
|
||||||
|
nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)] = now
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
delete(ageOfOldPod, string(pod.UID))
|
delete(nodeToAgeOfOldPod, pod.Spec.NodeName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// purge the old pods list of any deleted pods
|
// purge the old pods list of any deleted pods
|
||||||
for uid := range ageOfOldPod {
|
for node, uidToTime := range nodeToAgeOfOldPod {
|
||||||
|
for uid := range uidToTime {
|
||||||
if !podUIDs.Has(uid) {
|
if !podUIDs.Has(uid) {
|
||||||
delete(ageOfOldPod, uid)
|
delete(uidToTime, uid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(uidToTime) == 0 {
|
||||||
|
delete(nodeToAgeOfOldPod, node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
deliberatelyDeletedPods = deliberatelyDeletedPods.Intersection(deletedPodUIDs)
|
deliberatelyDeletedPods = deliberatelyDeletedPods.Intersection(deletedPodUIDs)
|
||||||
@ -713,9 +723,11 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period)
|
// invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period)
|
||||||
for uid, firstSeen := range ageOfOldPod {
|
for node, uidToTime := range nodeToAgeOfOldPod {
|
||||||
if now.Sub(firstSeen) > maxSurgeOverlap {
|
for uid, firstSeenSinceNewVersionPod := range uidToTime {
|
||||||
errs = append(errs, fmt.Sprintf("An old pod with UID %s has been running alongside a newer version for longer than %s", uid, maxSurgeOverlap))
|
if now.Sub(firstSeenSinceNewVersionPod) > maxSurgeOverlap {
|
||||||
|
errs = append(errs, fmt.Sprintf("An old pod with UID %s on a node %s has been running alongside a newer version for longer than %s", uid, node, maxSurgeOverlap))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -800,6 +812,9 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
|
|||||||
} else {
|
} else {
|
||||||
framework.Logf("Deleted pod %s prematurely", pod.Name)
|
framework.Logf("Deleted pod %s prematurely", pod.Name)
|
||||||
deliberatelyDeletedPods.Insert(string(pod.UID))
|
deliberatelyDeletedPods.Insert(string(pod.UID))
|
||||||
|
// If it is an old version we do not need to measure the controller reaction because we have done it instead.
|
||||||
|
// If it is a new version, we have to reset the time to start counting the time for the replacement pod to reach readiness again.
|
||||||
|
delete(nodeToAgeOfOldPod, pod.Spec.NodeName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user