kubelet: Never restart containers in deleting pods

When constructing the API status of a pod, if the pod is marked for
deletion no containers should be started. Previously, if a container
inside of a terminating pod failed to start due to a container
runtime error (that populates reasonCache) the reasonCache would
remain populated (it is only updated by syncPod for non-terminating
pods) and the delete action on the pod would be delayed until the
reasonCache entry expired due to other pods.

This dramatically reduces the amount of time the Kubelet waits to
delete pods that are terminating and encountered a container runtime
error.
This commit is contained in:
Clayton Coleman 2020-02-24 19:34:49 -05:00
parent 2364c10e2e
commit 8722c834e5
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
2 changed files with 28 additions and 0 deletions

View File

@ -61,6 +61,10 @@ type RuntimeHelper interface {
// ShouldContainerBeRestarted checks whether a container needs to be restarted.
// TODO(yifan): Think about how to refactor this.
func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
// Once a pod has been marked deleted, it should not be restarted
if pod.DeletionTimestamp != nil {
return false
}
// Get latest container status.
status := podStatus.FindContainerStatusByName(container.Name)
// If the container was never started before, we should start it.

View File

@ -19,6 +19,7 @@ package container
import (
"reflect"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
@ -449,6 +450,8 @@ func TestShouldContainerBeRestarted(t *testing.T) {
v1.RestartPolicyOnFailure,
v1.RestartPolicyAlways,
}
// test policies
expected := map[string][]bool{
"no-history": {true, true, true},
"alive": {false, false, false},
@ -467,6 +470,27 @@ func TestShouldContainerBeRestarted(t *testing.T) {
}
}
}
// test deleted pod
pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
expected = map[string][]bool{
"no-history": {false, false, false},
"alive": {false, false, false},
"succeed": {false, false, false},
"failed": {false, false, false},
"unknown": {false, false, false},
}
for _, c := range pod.Spec.Containers {
for i, policy := range policies {
pod.Spec.RestartPolicy = policy
e := expected[c.Name][i]
r := ShouldContainerBeRestarted(&c, pod, podStatus)
if r != e {
t.Errorf("Restart for container %q with restart policy %q expected %t, got %t",
c.Name, policy, e, r)
}
}
}
}
func TestHasPrivilegedContainer(t *testing.T) {