Merge pull request #98005 from wzshiming/fix-rescheduling-to-the-shutdown-node

Sync node status during kubelet node shutdown
This commit is contained in:
Kubernetes Prow Robot 2021-01-28 17:51:53 -08:00 committed by GitHub
commit 9ec1e23e41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 60 additions and 10 deletions

View File

@ -822,7 +822,11 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
v1.NamespaceNodeLease,
util.SetNodeOwnerFunc(klet.heartbeatClient, string(klet.nodeName)))
klet.shutdownManager = nodeshutdown.NewManager(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeCfg.ShutdownGracePeriod.Duration, kubeCfg.ShutdownGracePeriodCriticalPods.Duration)
// setup node shutdown manager
shutdownManager, shutdownAdmitHandler := nodeshutdown.NewManager(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.syncNodeStatus, kubeCfg.ShutdownGracePeriod.Duration, kubeCfg.ShutdownGracePeriodCriticalPods.Duration)
klet.shutdownManager = shutdownManager
klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)
// Finally, put the most recent version of the config on the Kubelet, so
// people can see how it was configured.

View File

@ -11,10 +11,12 @@ go_library(
deps = select({
"@io_bazel_rules_go//go/platform:aix": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:android": [
"//pkg/features:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/nodeshutdown/systemd:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
@ -26,25 +28,32 @@ go_library(
],
"@io_bazel_rules_go//go/platform:darwin": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:illumos": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:ios": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:js": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/features:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/nodeshutdown/systemd:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
@ -56,21 +65,27 @@ go_library(
],
"@io_bazel_rules_go//go/platform:nacl": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:plan9": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:solaris": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"@io_bazel_rules_go//go/platform:windows": [
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
"//conditions:default": [],
}),

View File

@ -31,14 +31,16 @@ import (
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
const (
nodeShutdownReason = "Shutdown"
nodeShutdownMessage = "Node is shutting, evicting pods"
nodeShutdownReason = "Shutdown"
nodeShutdownMessage = "Node is shutting, evicting pods"
nodeShutdownNotAdmitMessage = "Node is in progress of shutting down, not admitting any new pods"
)
var systemDbus = func() (dbusInhibiter, error) {
@ -63,8 +65,9 @@ type Manager struct {
shutdownGracePeriodRequested time.Duration
shutdownGracePeriodCriticalPods time.Duration
getPods eviction.ActivePodsFunc
killPod eviction.KillPodFunc
getPods eviction.ActivePodsFunc
killPod eviction.KillPodFunc
syncNodeStatus func()
dbusCon dbusInhibiter
inhibitLock systemd.InhibitLock
@ -76,14 +79,30 @@ type Manager struct {
}
// NewManager returns a new node shutdown manager.
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) *Manager {
return &Manager{
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
manager := &Manager{
getPods: getPodsFunc,
killPod: killPodFunc,
syncNodeStatus: syncNodeStatus,
shutdownGracePeriodRequested: shutdownGracePeriodRequested,
shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods,
clock: clock.RealClock{},
}
return manager, manager
}
// Admit rejects all pods if node is shutting
func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
nodeShuttingDown := m.ShutdownStatus() != nil
if nodeShuttingDown {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: nodeShutdownReason,
Message: nodeShutdownNotAdmitMessage,
}
}
return lifecycle.PodAdmitResult{Admit: true}
}
// Start starts the node shutdown manager and will start watching the node for shutdown events.
@ -158,6 +177,9 @@ func (m *Manager) Start() error {
m.nodeShuttingDownMutex.Unlock()
if isShuttingDown {
// Update node status and ready condition
go m.syncNodeStatus()
m.processShutdownEvent()
} else {
m.aquireInhibitLock()

View File

@ -224,7 +224,7 @@ func TestManager(t *testing.T) {
}
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, true)()
manager := NewManager(activePodsFunc, killPodsFunc, tc.shutdownGracePeriodRequested, tc.shutdownGracePeriodCriticalPods)
manager, _ := NewManager(activePodsFunc, killPodsFunc, func() {}, tc.shutdownGracePeriodRequested, tc.shutdownGracePeriodCriticalPods)
manager.clock = clock.NewFakeClock(time.Now())
err := manager.Start()
@ -236,6 +236,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err, "expected manager.Start() to not return error")
assert.True(t, fakeDbus.didInhibitShutdown, "expected that manager inhibited shutdown")
assert.NoError(t, manager.ShutdownStatus(), "expected that manager does not return error since shutdown is not active")
assert.Equal(t, manager.Admit(nil).Admit, true)
// Send fake shutdown event
fakeShutdownChan <- true
@ -253,6 +254,7 @@ func TestManager(t *testing.T) {
}
assert.Error(t, manager.ShutdownStatus(), "expected that manager returns error since shutdown is active")
assert.Equal(t, manager.Admit(nil).Admit, false)
assert.Equal(t, tc.expectedPodToGracePeriodOverride, killedPodsToGracePeriods)
assert.Equal(t, tc.expectedDidOverrideInhibitDelay, fakeDbus.didOverrideInhibitDelay, "override system inhibit delay differs")
}

View File

@ -22,14 +22,21 @@ import (
"time"
"k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
// Manager is a fake node shutdown manager for non linux platforms.
type Manager struct{}
// NewManager returns a fake node shutdown manager for non linux platforms.
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) *Manager {
return &Manager{}
func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
m := &Manager{}
return m, m
}
// Admit returns a fake Pod admission which always returns true
func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
return lifecycle.PodAdmitResult{Admit: true}
}
// Start is a no-op always returning nil for non linux platforms.