mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-12 05:21:58 +00:00
tests: Solve backoff tests flakiness
The container status is not constant, and can change over time in the following order: - Running: When kubelet reports the Pod as running. This state is missable if the container finishes its command faster than kubelet getting to report this state. - Terminated: After the Container finished its command, it will enter the Terminated state, in which will remain for a short period of time, before kubelet will try to restart it. - Waiting: When kubelet has to wait for the backoff period to expire before actually restarting the container. Treating and handling each of these states when calculating the backoff period between container restarts will make the tests more reliable.
This commit is contained in:
parent
5716127cff
commit
e44961e47d
@ -109,6 +109,8 @@ func startPodAndGetBackOffs(podClient *framework.PodClient, pod *v1.Pod, sleepAm
|
||||
|
||||
func getRestartDelay(podClient *framework.PodClient, podName string, containerName string) (time.Duration, error) {
|
||||
beginTime := time.Now()
|
||||
var previousRestartCount int32 = -1
|
||||
var previousFinishedAt time.Time
|
||||
for time.Since(beginTime) < (2 * maxBackOffTolerance) { // may just miss the 1st MaxContainerBackOff delay
|
||||
time.Sleep(time.Second)
|
||||
pod, err := podClient.Get(podName, metav1.GetOptions{})
|
||||
@ -119,11 +121,37 @@ func getRestartDelay(podClient *framework.PodClient, podName string, containerNa
|
||||
continue
|
||||
}
|
||||
|
||||
if status.State.Waiting == nil && status.State.Terminated != nil && status.LastTerminationState.Terminated != nil && status.State.Terminated.StartedAt.Time.After(beginTime) {
|
||||
startedAt := status.State.Terminated.StartedAt.Time
|
||||
finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time
|
||||
framework.Logf("getRestartDelay: restartCount = %d, finishedAt=%s restartedAt=%s (%s)", status.RestartCount, finishedAt, startedAt, startedAt.Sub(finishedAt))
|
||||
return startedAt.Sub(finishedAt), nil
|
||||
// the only case this happens is if this is the first time the Pod is running and there is no "Last State".
|
||||
if status.LastTerminationState.Terminated == nil {
|
||||
framework.Logf("Container's last state is not \"Terminated\".")
|
||||
continue
|
||||
}
|
||||
|
||||
if previousRestartCount == -1 {
|
||||
if status.State.Running != nil {
|
||||
// container is still Running, there is no "FinishedAt" time.
|
||||
continue
|
||||
} else if status.State.Terminated != nil {
|
||||
previousFinishedAt = status.State.Terminated.FinishedAt.Time
|
||||
} else {
|
||||
previousFinishedAt = status.LastTerminationState.Terminated.FinishedAt.Time
|
||||
}
|
||||
previousRestartCount = status.RestartCount
|
||||
}
|
||||
|
||||
// when the RestartCount is changed, the Containers will be in one of the following states:
|
||||
//Running, Terminated, Waiting (it already is waiting for the backoff period to expire, and the last state details have been stored into status.LastTerminationState).
|
||||
if status.RestartCount > previousRestartCount {
|
||||
var startedAt time.Time
|
||||
if status.State.Running != nil {
|
||||
startedAt = status.State.Running.StartedAt.Time
|
||||
} else if status.State.Terminated != nil {
|
||||
startedAt = status.State.Terminated.StartedAt.Time
|
||||
} else {
|
||||
startedAt = status.LastTerminationState.Terminated.StartedAt.Time
|
||||
}
|
||||
framework.Logf("getRestartDelay: restartCount = %d, finishedAt=%s restartedAt=%s (%s)", status.RestartCount, previousFinishedAt, startedAt, startedAt.Sub(previousFinishedAt))
|
||||
return startedAt.Sub(previousFinishedAt), nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("timeout getting pod restart delay")
|
||||
|
Loading…
Reference in New Issue
Block a user