Merge pull request #98005 from wzshiming/fix-rescheduling-to-the-shutdown-node

Sync node status during kubelet node shutdown
This commit is contained in:
Kubernetes Prow Robot 2021-01-28 17:51:53 -08:00 committed by GitHub
commit 9ec1e23e41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 60 additions and 10 deletions

View File

@ -822,7 +822,11 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
v1.NamespaceNodeLease, v1.NamespaceNodeLease,
util.SetNodeOwnerFunc(klet.heartbeatClient, string(klet.nodeName))) util.SetNodeOwnerFunc(klet.heartbeatClient, string(klet.nodeName)))
klet.shutdownManager = nodeshutdown.NewManager(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeCfg.ShutdownGracePeriod.Duration, kubeCfg.ShutdownGracePeriodCriticalPods.Duration) // setup node shutdown manager
shutdownManager, shutdownAdmitHandler := nodeshutdown.NewManager(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.syncNodeStatus, kubeCfg.ShutdownGracePeriod.Duration, kubeCfg.ShutdownGracePeriodCriticalPods.Duration)
klet.shutdownManager = shutdownManager
klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)
// Finally, put the most recent version of the config on the Kubelet, so // Finally, put the most recent version of the config on the Kubelet, so
// people can see how it was configured. // people can see how it was configured.

View File

@ -11,10 +11,12 @@ go_library(
deps = select({ deps = select({
"@io_bazel_rules_go//go/platform:aix": [ "@io_bazel_rules_go//go/platform:aix": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:android": [ "@io_bazel_rules_go//go/platform:android": [
"//pkg/features:go_default_library", "//pkg/features:go_default_library",
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/nodeshutdown/systemd:go_default_library", "//pkg/kubelet/nodeshutdown/systemd:go_default_library",
"//pkg/kubelet/types:go_default_library", "//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library", "//pkg/kubelet/util/format:go_default_library",
@ -26,25 +28,32 @@ go_library(
], ],
"@io_bazel_rules_go//go/platform:darwin": [ "@io_bazel_rules_go//go/platform:darwin": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:dragonfly": [ "@io_bazel_rules_go//go/platform:dragonfly": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:freebsd": [ "@io_bazel_rules_go//go/platform:freebsd": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:illumos": [ "@io_bazel_rules_go//go/platform:illumos": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:ios": [ "@io_bazel_rules_go//go/platform:ios": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:js": [ "@io_bazel_rules_go//go/platform:js": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:linux": [ "@io_bazel_rules_go//go/platform:linux": [
"//pkg/features:go_default_library", "//pkg/features:go_default_library",
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/nodeshutdown/systemd:go_default_library", "//pkg/kubelet/nodeshutdown/systemd:go_default_library",
"//pkg/kubelet/types:go_default_library", "//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library", "//pkg/kubelet/util/format:go_default_library",
@ -56,21 +65,27 @@ go_library(
], ],
"@io_bazel_rules_go//go/platform:nacl": [ "@io_bazel_rules_go//go/platform:nacl": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:netbsd": [ "@io_bazel_rules_go//go/platform:netbsd": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:openbsd": [ "@io_bazel_rules_go//go/platform:openbsd": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:plan9": [ "@io_bazel_rules_go//go/platform:plan9": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:solaris": [ "@io_bazel_rules_go//go/platform:solaris": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"@io_bazel_rules_go//go/platform:windows": [ "@io_bazel_rules_go//go/platform:windows": [
"//pkg/kubelet/eviction:go_default_library", "//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
], ],
"//conditions:default": [], "//conditions:default": [],
}), }),

View File

@ -31,14 +31,16 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd" "k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/kubelet/util/format"
) )
const ( const (
nodeShutdownReason = "Shutdown" nodeShutdownReason = "Shutdown"
nodeShutdownMessage = "Node is shutting, evicting pods" nodeShutdownMessage = "Node is shutting, evicting pods"
nodeShutdownNotAdmitMessage = "Node is in progress of shutting down, not admitting any new pods"
) )
var systemDbus = func() (dbusInhibiter, error) { var systemDbus = func() (dbusInhibiter, error) {
@ -63,8 +65,9 @@ type Manager struct {
shutdownGracePeriodRequested time.Duration shutdownGracePeriodRequested time.Duration
shutdownGracePeriodCriticalPods time.Duration shutdownGracePeriodCriticalPods time.Duration
getPods eviction.ActivePodsFunc getPods eviction.ActivePodsFunc
killPod eviction.KillPodFunc killPod eviction.KillPodFunc
syncNodeStatus func()
dbusCon dbusInhibiter dbusCon dbusInhibiter
inhibitLock systemd.InhibitLock inhibitLock systemd.InhibitLock
@ -76,14 +79,30 @@ type Manager struct {
} }
// NewManager returns a new node shutdown manager. // NewManager returns a new node shutdown manager.
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) *Manager { func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
return &Manager{ manager := &Manager{
getPods: getPodsFunc, getPods: getPodsFunc,
killPod: killPodFunc, killPod: killPodFunc,
syncNodeStatus: syncNodeStatus,
shutdownGracePeriodRequested: shutdownGracePeriodRequested, shutdownGracePeriodRequested: shutdownGracePeriodRequested,
shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods, shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods,
clock: clock.RealClock{}, clock: clock.RealClock{},
} }
return manager, manager
}
// Admit rejects all pods if node is shutting
func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
nodeShuttingDown := m.ShutdownStatus() != nil
if nodeShuttingDown {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: nodeShutdownReason,
Message: nodeShutdownNotAdmitMessage,
}
}
return lifecycle.PodAdmitResult{Admit: true}
} }
// Start starts the node shutdown manager and will start watching the node for shutdown events. // Start starts the node shutdown manager and will start watching the node for shutdown events.
@ -158,6 +177,9 @@ func (m *Manager) Start() error {
m.nodeShuttingDownMutex.Unlock() m.nodeShuttingDownMutex.Unlock()
if isShuttingDown { if isShuttingDown {
// Update node status and ready condition
go m.syncNodeStatus()
m.processShutdownEvent() m.processShutdownEvent()
} else { } else {
m.aquireInhibitLock() m.aquireInhibitLock()

View File

@ -224,7 +224,7 @@ func TestManager(t *testing.T) {
} }
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, true)() defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, true)()
manager := NewManager(activePodsFunc, killPodsFunc, tc.shutdownGracePeriodRequested, tc.shutdownGracePeriodCriticalPods) manager, _ := NewManager(activePodsFunc, killPodsFunc, func() {}, tc.shutdownGracePeriodRequested, tc.shutdownGracePeriodCriticalPods)
manager.clock = clock.NewFakeClock(time.Now()) manager.clock = clock.NewFakeClock(time.Now())
err := manager.Start() err := manager.Start()
@ -236,6 +236,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err, "expected manager.Start() to not return error") assert.NoError(t, err, "expected manager.Start() to not return error")
assert.True(t, fakeDbus.didInhibitShutdown, "expected that manager inhibited shutdown") assert.True(t, fakeDbus.didInhibitShutdown, "expected that manager inhibited shutdown")
assert.NoError(t, manager.ShutdownStatus(), "expected that manager does not return error since shutdown is not active") assert.NoError(t, manager.ShutdownStatus(), "expected that manager does not return error since shutdown is not active")
assert.Equal(t, manager.Admit(nil).Admit, true)
// Send fake shutdown event // Send fake shutdown event
fakeShutdownChan <- true fakeShutdownChan <- true
@ -253,6 +254,7 @@ func TestManager(t *testing.T) {
} }
assert.Error(t, manager.ShutdownStatus(), "expected that manager returns error since shutdown is active") assert.Error(t, manager.ShutdownStatus(), "expected that manager returns error since shutdown is active")
assert.Equal(t, manager.Admit(nil).Admit, false)
assert.Equal(t, tc.expectedPodToGracePeriodOverride, killedPodsToGracePeriods) assert.Equal(t, tc.expectedPodToGracePeriodOverride, killedPodsToGracePeriods)
assert.Equal(t, tc.expectedDidOverrideInhibitDelay, fakeDbus.didOverrideInhibitDelay, "override system inhibit delay differs") assert.Equal(t, tc.expectedDidOverrideInhibitDelay, fakeDbus.didOverrideInhibitDelay, "override system inhibit delay differs")
} }

View File

@ -22,14 +22,21 @@ import (
"time" "time"
"k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
) )
// Manager is a fake node shutdown manager for non linux platforms. // Manager is a fake node shutdown manager for non linux platforms.
type Manager struct{} type Manager struct{}
// NewManager returns a fake node shutdown manager for non linux platforms. // NewManager returns a fake node shutdown manager for non linux platforms.
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) *Manager { func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
return &Manager{} m := &Manager{}
return m, m
}
// Admit returns a fake Pod admission which always returns true
func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
return lifecycle.PodAdmitResult{Admit: true}
} }
// Start is a no-op always returning nil for non linux platforms. // Start is a no-op always returning nil for non linux platforms.