mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #32213 from derekwaynecarr/log-an-event-when-eviction-cannot-kill-fast-enough
Automatic merge from submit-queue Log an event when container runtime exceeds grace-period during eviction While debugging flakes in eviction, I encountered scenarios where the container run-time did not evict a pod within the allowed grace period. This could result in situations where a BE pod would not get killed fast enough and therefore a Bu pod was killed next (assuming there were no other BE pods) /cc @mtaufen @vishh
This commit is contained in:
commit
4c9ab4e856
@ -24,6 +24,7 @@ const (
|
||||
FailedToStartContainer = "Failed"
|
||||
KillingContainer = "Killing"
|
||||
BackOffStartContainer = "BackOff"
|
||||
ExceededGracePeriod = "ExceededGracePeriod"
|
||||
|
||||
// Image event reason list
|
||||
PullingImage = "Pulling"
|
||||
|
@ -701,7 +701,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
||||
klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs()
|
||||
|
||||
// setup eviction manager
|
||||
evictionManager, evictionAdmitHandler, err := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock)
|
||||
evictionManager, evictionAdmitHandler, err := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize eviction manager: %v", err)
|
||||
|
@ -226,7 +226,7 @@ func newTestKubeletWithImageList(
|
||||
Namespace: "",
|
||||
}
|
||||
// setup eviction manager
|
||||
evictionManager, evictionAdmitHandler, err := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock)
|
||||
evictionManager, evictionAdmitHandler, err := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock)
|
||||
require.NoError(t, err, "Failed to initialize eviction manager")
|
||||
|
||||
kubelet.evictionManager = evictionManager
|
||||
|
@ -282,7 +282,7 @@ func (p *podWorkers) checkForUpdates(uid types.UID) {
|
||||
|
||||
// killPodNow returns a KillPodFunc that can be used to kill a pod.
|
||||
// It is intended to be injected into other modules that need to kill a pod.
|
||||
func killPodNow(podWorkers PodWorkers) eviction.KillPodFunc {
|
||||
func killPodNow(podWorkers PodWorkers, recorder record.EventRecorder) eviction.KillPodFunc {
|
||||
return func(pod *api.Pod, status api.PodStatus, gracePeriodOverride *int64) error {
|
||||
// determine the grace period to use when killing the pod
|
||||
gracePeriod := int64(0)
|
||||
@ -325,6 +325,7 @@ func killPodNow(podWorkers PodWorkers) eviction.KillPodFunc {
|
||||
case r := <-ch:
|
||||
return r.err
|
||||
case <-time.After(timeoutDuration):
|
||||
recorder.Eventf(pod, api.EventTypeWarning, events.ExceededGracePeriod, "Container runtime did not kill the pod within specified grace period.")
|
||||
return fmt.Errorf("timeout waiting to kill pod")
|
||||
}
|
||||
}
|
||||
|
@ -331,8 +331,9 @@ func TestFakePodWorkers(t *testing.T) {
|
||||
|
||||
// TestKillPodNowFunc tests the blocking kill pod function works with pod workers as expected.
|
||||
func TestKillPodNowFunc(t *testing.T) {
|
||||
fakeRecorder := &record.FakeRecorder{}
|
||||
podWorkers, processed := createPodWorkers()
|
||||
killPodFunc := killPodNow(podWorkers)
|
||||
killPodFunc := killPodNow(podWorkers, fakeRecorder)
|
||||
pod := newPod("test", "test")
|
||||
gracePeriodOverride := int64(0)
|
||||
err := killPodFunc(pod, api.PodStatus{Phase: api.PodFailed, Reason: "reason", Message: "message"}, &gracePeriodOverride)
|
||||
|
Loading…
Reference in New Issue
Block a user