Merge pull request #133138 from linxiulei/fix_shutdown

kubelet: poll after reloading logind conf
This commit is contained in:
Kubernetes Prow Robot
2025-09-03 10:05:13 -07:00
committed by GitHub
2 changed files with 32 additions and 8 deletions

View File

@@ -27,6 +27,7 @@ import (
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
@@ -191,15 +192,38 @@ func (m *managerImpl) start() (chan struct{}, error) {
return nil, err
}
// Read the current inhibitDelay again, if the override was successful, currentInhibitDelay will be equal to shutdownGracePeriodRequested.
updatedInhibitDelay, err := m.dbusCon.CurrentInhibitDelay()
// The ReloadLogindConf call is asynchronous. Poll with exponential backoff until the configuration is updated.
backoff := wait.Backoff{
Duration: 100 * time.Millisecond,
Factor: 2.0,
Steps: 5,
}
var updatedInhibitDelay time.Duration
attempt := 0
err = wait.ExponentialBackoff(backoff, func() (bool, error) {
attempt += 1
// Read the current inhibitDelay again, if the override was successful, currentInhibitDelay will be equal to shutdownGracePeriodRequested.
updatedInhibitDelay, err = m.dbusCon.CurrentInhibitDelay()
if err != nil {
return false, err
}
if periodRequested <= updatedInhibitDelay {
return true, nil
}
if attempt < backoff.Steps {
m.logger.V(3).Info("InhibitDelayMaxSec still less than requested, retrying", "attempt", attempt, "current", updatedInhibitDelay, "requested", periodRequested)
}
return false, nil
})
if err != nil {
return nil, err
if !wait.Interrupted(err) {
return nil, err
}
if periodRequested > updatedInhibitDelay {
return nil, fmt.Errorf("node shutdown manager was timed out after %d attempts waiting for logind InhibitDelayMaxSec to update to %v (ShutdownGracePeriod), current value is %v", attempt, periodRequested, updatedInhibitDelay)
}
}
if periodRequested > updatedInhibitDelay {
return nil, fmt.Errorf("node shutdown manager was unable to update logind InhibitDelayMaxSec to %v (ShutdownGracePeriod), current value of InhibitDelayMaxSec (%v) is less than requested ShutdownGracePeriod", periodRequested, updatedInhibitDelay)
}
}
err = m.acquireInhibitLock()

View File

@@ -278,7 +278,7 @@ func TestManager(t *testing.T) {
overrideSystemInhibitDelay: time.Duration(5 * time.Second),
expectedDidOverrideInhibitDelay: true,
expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 5, "critical-pod-nil-grace-period": 0},
expectedError: fmt.Errorf("unable to update logind InhibitDelayMaxSec to 30s (ShutdownGracePeriod), current value of InhibitDelayMaxSec (5s) is less than requested ShutdownGracePeriod"),
expectedError: fmt.Errorf("node shutdown manager was timed out after 5 attempts waiting for logind InhibitDelayMaxSec to update to 30s (ShutdownGracePeriod), current value is 5s"),
},
{
desc: "override unsuccessful, zero time",
@@ -287,7 +287,7 @@ func TestManager(t *testing.T) {
shutdownGracePeriodCriticalPods: time.Duration(5 * time.Second),
systemInhibitDelay: time.Duration(0 * time.Second),
overrideSystemInhibitDelay: time.Duration(0 * time.Second),
expectedError: fmt.Errorf("unable to update logind InhibitDelayMaxSec to 5s (ShutdownGracePeriod), current value of InhibitDelayMaxSec (0s) is less than requested ShutdownGracePeriod"),
expectedError: fmt.Errorf("node shutdown manager was timed out after 5 attempts waiting for logind InhibitDelayMaxSec to update to 5s (ShutdownGracePeriod), current value is 0s"),
},
{
desc: "no override, all time to critical pods",