From 94e494d9d7341640b3c41ce1eb5f806948e25f4d Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Sun, 6 Feb 2022 01:06:58 +0800 Subject: [PATCH 1/6] Promote kubelet graceful node shutdown based on pod priority to beta --- pkg/features/kube_features.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index c8ef943b70e..11c85985147 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -554,7 +554,7 @@ const ( // owner: @wzshiming // alpha: v1.23 - // + // beta: v1.24 // Make the kubelet use shutdown configuration based on pod priority values for graceful shutdown. GracefulNodeShutdownBasedOnPodPriority featuregate.Feature = "GracefulNodeShutdownBasedOnPodPriority" @@ -896,7 +896,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default and remove after v1.22 based on KEP #1972 update KubeletCredentialProviders: {Default: false, PreRelease: featuregate.Alpha}, GracefulNodeShutdown: {Default: true, PreRelease: featuregate.Beta}, - GracefulNodeShutdownBasedOnPodPriority: {Default: false, PreRelease: featuregate.Alpha}, + GracefulNodeShutdownBasedOnPodPriority: {Default: true, PreRelease: featuregate.Beta}, ServiceLBNodePortControl: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.26 MixedProtocolLBService: {Default: false, PreRelease: featuregate.Alpha}, VolumeCapacityPriority: {Default: false, PreRelease: featuregate.Alpha}, From 5eb3e88f6bf156cbcf4839d0ed138f5e89574d04 Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Fri, 11 Mar 2022 17:30:37 +0800 Subject: [PATCH 2/6] Support metrics for node shutdown --- pkg/kubelet/kubelet.go | 1 + pkg/kubelet/metrics/metrics.go | 27 ++++++++ .../nodeshutdown/nodeshutdown_manager.go | 1 + .../nodeshutdown_manager_linux.go | 59 ++++++++++++++++- pkg/kubelet/nodeshutdown/storage.go | 63 +++++++++++++++++++ 5 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 pkg/kubelet/nodeshutdown/storage.go diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 4d71e685369..dd86013edb7 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -832,6 +832,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, ShutdownGracePeriodRequested: kubeCfg.ShutdownGracePeriod.Duration, ShutdownGracePeriodCriticalPods: kubeCfg.ShutdownGracePeriodCriticalPods.Duration, ShutdownGracePeriodByPodPriority: kubeCfg.ShutdownGracePeriodByPodPriority, + StateDirectory: rootDirectory, }) klet.shutdownManager = shutdownManager klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 2051ffed448..cbfdead20d3 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -462,6 +462,26 @@ var ( StabilityLevel: metrics.ALPHA, }, ) + + // GracefulShutdownStartTime is a gauge that records the time at which the kubelet started graceful shutdown. + GracefulShutdownStartTime = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: "graceful_shutdown_start_time_seconds", + Help: "Last graceful shutdown start time since unix epoch in seconds", + StabilityLevel: metrics.ALPHA, + }, + ) + + // GracefulShutdownEndTime is a gauge that records the time at which the kubelet completed graceful shutdown. + GracefulShutdownEndTime = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: "graceful_shutdown_end_time_seconds", + Help: "Last graceful shutdown start time since unix epoch in seconds", + StabilityLevel: metrics.ALPHA, + }, + ) ) var registerMetrics sync.Once @@ -504,6 +524,13 @@ func Register(collectors ...metrics.StableCollector) { for _, collector := range collectors { legacyregistry.CustomMustRegister(collector) } + + if utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) && + utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority) { + legacyregistry.MustRegister(GracefulShutdownStartTime) + legacyregistry.MustRegister(GracefulShutdownEndTime) + } + }) } diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go index 4e4cfbfe5e9..00406299be1 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go @@ -46,6 +46,7 @@ type Config struct { ShutdownGracePeriodRequested time.Duration ShutdownGracePeriodCriticalPods time.Duration ShutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority + StateDirectory string Clock clock.Clock } diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go index a7d62048f57..29cdbf29fee 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go @@ -22,6 +22,7 @@ package nodeshutdown import ( "fmt" + "path/filepath" "sort" "sync" "time" @@ -36,6 +37,7 @@ import ( kubeletevents "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd" "k8s.io/kubernetes/pkg/kubelet/prober" "k8s.io/utils/clock" @@ -47,6 +49,7 @@ const ( nodeShutdownNotAdmittedReason = "NodeShutdown" nodeShutdownNotAdmittedMessage = "Pod was rejected as the node is shutting down." dbusReconnectPeriod = 1 * time.Second + localStorageStateFile = "graceful_node_shutdown_state" ) var systemDbus = func() (dbusInhibiter, error) { @@ -81,6 +84,9 @@ type managerImpl struct { nodeShuttingDownNow bool clock clock.Clock + + enableMetrics bool + storage storage } // NewManager returns a new node shutdown manager. @@ -120,6 +126,10 @@ func NewManager(conf *Config) (Manager, lifecycle.PodAdmitHandler) { syncNodeStatus: conf.SyncNodeStatusFunc, shutdownGracePeriodByPodPriority: shutdownGracePeriodByPodPriority, clock: conf.Clock, + enableMetrics: utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority), + storage: localStorage{ + Path: filepath.Join(conf.StateDirectory, localStorageStateFile), + }, } klog.InfoS("Creating node shutdown manager", "shutdownGracePeriodRequested", conf.ShutdownGracePeriodRequested, @@ -143,6 +153,24 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd return lifecycle.PodAdmitResult{Admit: true} } +// setMetrics sets the metrics for the node shutdown manager. +func (m *managerImpl) setMetrics() { + if m.enableMetrics && m.storage != nil { + sta := state{} + err := m.storage.Load(&sta) + if err != nil { + klog.ErrorS(err, "Failed to load graceful shutdown state") + } else { + if !sta.StartTime.IsZero() { + metrics.GracefulShutdownStartTime.Set(timestamp(sta.StartTime)) + } + if !sta.EndTime.IsZero() { + metrics.GracefulShutdownEndTime.Set(timestamp(sta.EndTime)) + } + } + } +} + // Start starts the node shutdown manager and will start watching the node for shutdown events. func (m *managerImpl) Start() error { stop, err := m.start() @@ -163,6 +191,8 @@ func (m *managerImpl) Start() error { } } }() + + m.setMetrics() return nil } @@ -289,6 +319,32 @@ func (m *managerImpl) processShutdownEvent() error { klog.V(1).InfoS("Shutdown manager processing shutdown event") activePods := m.getPods() + defer func() { + m.dbusCon.ReleaseInhibitLock(m.inhibitLock) + klog.V(1).InfoS("Shutdown manager completed processing shutdown event, node will shutdown shortly") + }() + + if m.enableMetrics && m.storage != nil { + startTime := time.Now() + err := m.storage.Store(state{ + StartTime: startTime, + }) + if err != nil { + klog.ErrorS(err, "Failed to store graceful shutdown state") + } + + defer func() { + endTime := time.Now() + err := m.storage.Store(state{ + StartTime: startTime, + EndTime: endTime, + }) + if err != nil { + klog.ErrorS(err, "Failed to store graceful shutdown state") + } + }() + } + groups := groupByPriority(m.shutdownGracePeriodByPodPriority, activePods) for _, group := range groups { // If there are no pods in a particular range, @@ -347,9 +403,6 @@ func (m *managerImpl) processShutdownEvent() error { } } - m.dbusCon.ReleaseInhibitLock(m.inhibitLock) - klog.V(1).InfoS("Shutdown manager completed processing shutdown event, node will shutdown shortly") - return nil } diff --git a/pkg/kubelet/nodeshutdown/storage.go b/pkg/kubelet/nodeshutdown/storage.go new file mode 100644 index 00000000000..07d7de09a67 --- /dev/null +++ b/pkg/kubelet/nodeshutdown/storage.go @@ -0,0 +1,63 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeshutdown + +import ( + "encoding/json" + "os" + "time" +) + +type storage interface { + Store(data interface{}) (err error) + Load(data interface{}) (err error) +} + +type localStorage struct { + Path string +} + +func (l localStorage) Store(data interface{}) (err error) { + b, err := json.Marshal(data) + if err != nil { + return err + } + return os.WriteFile(l.Path, b, 0644) +} + +func (l localStorage) Load(data interface{}) (err error) { + b, err := os.ReadFile(l.Path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + return json.Unmarshal(b, data) +} + +func timestamp(t time.Time) float64 { + if t.IsZero() { + return 0 + } + return float64(t.Unix()) +} + +type state struct { + StartTime time.Time `json:"startTime"` + EndTime time.Time `json:"endTime"` +} From 4aed18935e4a2281f84f4c8dd27dbc44dcf0fa1c Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Wed, 16 Feb 2022 10:13:50 +0800 Subject: [PATCH 3/6] Add test for storage --- pkg/kubelet/nodeshutdown/storage_test.go | 69 ++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pkg/kubelet/nodeshutdown/storage_test.go diff --git a/pkg/kubelet/nodeshutdown/storage_test.go b/pkg/kubelet/nodeshutdown/storage_test.go new file mode 100644 index 00000000000..3ad3e39fb14 --- /dev/null +++ b/pkg/kubelet/nodeshutdown/storage_test.go @@ -0,0 +1,69 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeshutdown + +import ( + "fmt" + "os" + "path/filepath" + "testing" + "time" +) + +func TestLocalStorage(t *testing.T) { + var localStorageStateFileName = "graceful_node_shutdown_state" + tempdir := os.TempDir() + path := filepath.Join(tempdir, localStorageStateFileName) + l := localStorage{ + Path: path, + } + now := time.Now() + want := state{ + StartTime: now, + EndTime: now, + } + err := l.Store(want) + if err != nil { + t.Error(err) + return + } + + got := state{} + err = l.Load(&got) + if err != nil { + t.Error(err) + return + } + + if !want.StartTime.Equal(got.StartTime) || !want.EndTime.Equal(got.EndTime) { + t.Errorf("got %+v, want %+v", got, want) + return + } + + raw, err := os.ReadFile(path) + if err != nil { + t.Error(err) + return + } + nowStr := now.Format(time.RFC3339Nano) + wantRaw := fmt.Sprintf(`{"startTime":"` + nowStr + `","endTime":"` + nowStr + `"}`) + if string(raw) != wantRaw { + t.Errorf("got %s, want %s", string(raw), wantRaw) + return + } + +} From 1322dbba463071caf316a8eca75fb5f2188c8fb3 Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Fri, 11 Mar 2022 17:30:10 +0800 Subject: [PATCH 4/6] Add e2e --- test/e2e_node/node_shutdown_linux_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/e2e_node/node_shutdown_linux_test.go b/test/e2e_node/node_shutdown_linux_test.go index 4d3b93b98fe..ccb92ecb756 100644 --- a/test/e2e_node/node_shutdown_linux_test.go +++ b/test/e2e_node/node_shutdown_linux_test.go @@ -398,6 +398,11 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut return nil }, podStatusUpdateTimeout, pollInterval).Should(gomega.BeNil()) } + + ginkgo.By("should have state file") + stateFile := "/var/lib/kubelet/graceful_node_shutdown_state" + _, err = os.Stat(stateFile) + framework.ExpectNoError(err) }) }) }) From a1fadab4b09a3287cb93f0305becfdf6ab2f0985 Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Fri, 11 Mar 2022 17:48:46 +0800 Subject: [PATCH 5/6] Atomic write status file --- pkg/kubelet/nodeshutdown/storage.go | 35 ++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/nodeshutdown/storage.go b/pkg/kubelet/nodeshutdown/storage.go index 07d7de09a67..fbf1ea90664 100644 --- a/pkg/kubelet/nodeshutdown/storage.go +++ b/pkg/kubelet/nodeshutdown/storage.go @@ -18,7 +18,10 @@ package nodeshutdown import ( "encoding/json" + "io" + "io/ioutil" "os" + "path/filepath" "time" ) @@ -36,7 +39,7 @@ func (l localStorage) Store(data interface{}) (err error) { if err != nil { return err } - return os.WriteFile(l.Path, b, 0644) + return atomicWrite(l.Path, b, 0644) } func (l localStorage) Load(data interface{}) (err error) { @@ -61,3 +64,33 @@ type state struct { StartTime time.Time `json:"startTime"` EndTime time.Time `json:"endTime"` } + +// atomicWrite atomically writes data to a file specified by filename. +func atomicWrite(filename string, data []byte, perm os.FileMode) error { + f, err := ioutil.TempFile(filepath.Dir(filename), ".tmp-"+filepath.Base(filename)) + if err != nil { + return err + } + err = os.Chmod(f.Name(), perm) + if err != nil { + f.Close() + return err + } + n, err := f.Write(data) + if err != nil { + f.Close() + return err + } + if n < len(data) { + f.Close() + return io.ErrShortWrite + } + if err := f.Sync(); err != nil { + f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + return os.Rename(f.Name(), filename) +} From ced991cb0071137f3f1eaf4aab048f0f558905b5 Mon Sep 17 00:00:00 2001 From: Shiming Zhang Date: Wed, 16 Mar 2022 10:14:55 +0800 Subject: [PATCH 6/6] Emit Metrics in the shutdown process --- pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go index 29cdbf29fee..6974d82acf4 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go @@ -332,6 +332,8 @@ func (m *managerImpl) processShutdownEvent() error { if err != nil { klog.ErrorS(err, "Failed to store graceful shutdown state") } + metrics.GracefulShutdownStartTime.Set(timestamp(startTime)) + metrics.GracefulShutdownEndTime.Set(0) defer func() { endTime := time.Now() @@ -342,6 +344,7 @@ func (m *managerImpl) processShutdownEvent() error { if err != nil { klog.ErrorS(err, "Failed to store graceful shutdown state") } + metrics.GracefulShutdownStartTime.Set(timestamp(endTime)) }() }