mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-26 12:46:06 +00:00
Implement shutdown manager in kubelet
Implements KEP 2000, Graceful Node Shutdown: https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2000-graceful-node-shutdown * Add new FeatureGate `GracefulNodeShutdown` to control enabling/disabling the feature * Add two new KubeletConfiguration options * `ShutdownGracePeriod` and `ShutdownGracePeriodCriticalPods` * Add new package, `nodeshutdown` that implements the Node shutdown manager * The node shutdown manager uses the systemd inhibit package, to create an system inhibitor, monitor for node shutdown events, and gracefully terminate pods upon a node shutdown.
This commit is contained in:
@@ -234,5 +234,7 @@ var (
|
||||
"TypeMeta.Kind",
|
||||
"VolumeStatsAggPeriod.Duration",
|
||||
"VolumePluginDir",
|
||||
"ShutdownGracePeriod.Duration",
|
||||
"ShutdownGracePeriodCriticalPods.Duration",
|
||||
)
|
||||
)
|
||||
|
@@ -67,6 +67,8 @@ registryPullQPS: 5
|
||||
resolvConf: /etc/resolv.conf
|
||||
runtimeRequestTimeout: 2m0s
|
||||
serializeImagePulls: true
|
||||
shutdownGracePeriod: 0s
|
||||
shutdownGracePeriodCriticalPods: 0s
|
||||
streamingConnectionIdleTimeout: 4h0m0s
|
||||
syncFrequency: 1m0s
|
||||
topologyManagerPolicy: none
|
||||
|
@@ -67,6 +67,8 @@ registryPullQPS: 5
|
||||
resolvConf: /etc/resolv.conf
|
||||
runtimeRequestTimeout: 2m0s
|
||||
serializeImagePulls: true
|
||||
shutdownGracePeriod: 0s
|
||||
shutdownGracePeriodCriticalPods: 0s
|
||||
streamingConnectionIdleTimeout: 4h0m0s
|
||||
syncFrequency: 1m0s
|
||||
topologyManagerPolicy: none
|
||||
|
@@ -375,6 +375,13 @@ type KubeletConfiguration struct {
|
||||
Logging componentbaseconfig.LoggingConfiguration
|
||||
// EnableSystemLogHandler enables /logs handler.
|
||||
EnableSystemLogHandler bool
|
||||
// ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown.
|
||||
// Defaults to 30 seconds, requires GracefulNodeShutdown feature gate to be enabled.
|
||||
ShutdownGracePeriod metav1.Duration
|
||||
// ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod.
|
||||
// Defaults to 10 seconds, requires GracefulNodeShutdown feature gate to be enabled.
|
||||
// For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods.
|
||||
ShutdownGracePeriodCriticalPods metav1.Duration
|
||||
}
|
||||
|
||||
// KubeletAuthorizationMode denotes the authorization mode for the kubelet
|
||||
|
@@ -350,6 +350,8 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in
|
||||
if err := v1.Convert_Pointer_bool_To_bool(&in.EnableSystemLogHandler, &out.EnableSystemLogHandler, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.ShutdownGracePeriod = in.ShutdownGracePeriod
|
||||
out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -501,6 +503,8 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in
|
||||
if err := v1.Convert_bool_To_Pointer_bool(&in.EnableSystemLogHandler, &out.EnableSystemLogHandler, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.ShutdownGracePeriod = in.ShutdownGracePeriod
|
||||
out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@@ -140,6 +140,21 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
|
||||
allErrors = append(allErrors, fmt.Errorf("invalid configuration: topologyManagerScope non-allowable value: %v", kc.TopologyManagerScope))
|
||||
}
|
||||
|
||||
if localFeatureGate.Enabled(features.GracefulNodeShutdown) {
|
||||
if kc.ShutdownGracePeriod.Duration < 0 || kc.ShutdownGracePeriodCriticalPods.Duration < 0 || kc.ShutdownGracePeriodCriticalPods.Duration > kc.ShutdownGracePeriod.Duration {
|
||||
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriod %v must be >= 0, ShutdownGracePeriodCriticalPods %v must be >= 0, and ShutdownGracePeriodCriticalPods %v must be <= ShutdownGracePeriod %v", kc.ShutdownGracePeriod, kc.ShutdownGracePeriodCriticalPods, kc.ShutdownGracePeriodCriticalPods, kc.ShutdownGracePeriod))
|
||||
}
|
||||
if kc.ShutdownGracePeriod.Duration > 0 && kc.ShutdownGracePeriod.Duration < time.Duration(time.Second) {
|
||||
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriod %v must be either zero or otherwise >= 1 sec", kc.ShutdownGracePeriod))
|
||||
}
|
||||
if kc.ShutdownGracePeriodCriticalPods.Duration > 0 && kc.ShutdownGracePeriodCriticalPods.Duration < time.Duration(time.Second) {
|
||||
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriodCriticalPods %v must be either zero or otherwise >= 1 sec", kc.ShutdownGracePeriodCriticalPods))
|
||||
}
|
||||
}
|
||||
if (kc.ShutdownGracePeriod.Duration > 0 || kc.ShutdownGracePeriodCriticalPods.Duration > 0) && !localFeatureGate.Enabled(features.GracefulNodeShutdown) {
|
||||
allErrors = append(allErrors, fmt.Errorf("invalid configuration: Specifying ShutdownGracePeriod or ShutdownGracePeriodCriticalPods requires feature gate GracefulNodeShutdown"))
|
||||
}
|
||||
|
||||
for _, val := range kc.EnforceNodeAllocatable {
|
||||
switch val {
|
||||
case kubetypes.NodeAllocatableEnforcementKey:
|
||||
|
@@ -27,36 +27,39 @@ import (
|
||||
|
||||
func TestValidateKubeletConfiguration(t *testing.T) {
|
||||
successCase1 := &kubeletconfig.KubeletConfiguration{
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
|
||||
SystemReservedCgroup: "/system.slice",
|
||||
KubeReservedCgroup: "/kubelet.service",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Millisecond},
|
||||
TopologyManagerScope: kubeletconfig.PodTopologyManagerScope,
|
||||
TopologyManagerPolicy: kubeletconfig.SingleNumaNodeTopologyManagerPolicy,
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
|
||||
SystemReservedCgroup: "/system.slice",
|
||||
KubeReservedCgroup: "/kubelet.service",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Millisecond},
|
||||
TopologyManagerScope: kubeletconfig.PodTopologyManagerScope,
|
||||
TopologyManagerPolicy: kubeletconfig.SingleNumaNodeTopologyManagerPolicy,
|
||||
ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second},
|
||||
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
|
||||
FeatureGates: map[string]bool{
|
||||
"CustomCPUCFSQuotaPeriod": true,
|
||||
"GracefulNodeShutdown": true,
|
||||
},
|
||||
}
|
||||
if allErrors := ValidateKubeletConfiguration(successCase1); allErrors != nil {
|
||||
@@ -64,37 +67,40 @@ func TestValidateKubeletConfiguration(t *testing.T) {
|
||||
}
|
||||
|
||||
successCase2 := &kubeletconfig.KubeletConfiguration{
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods"},
|
||||
SystemReservedCgroup: "",
|
||||
KubeReservedCgroup: "",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
|
||||
ReservedSystemCPUs: "0-3",
|
||||
TopologyManagerScope: kubeletconfig.ContainerTopologyManagerScope,
|
||||
TopologyManagerPolicy: kubeletconfig.NoneTopologyManagerPolicy,
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods"},
|
||||
SystemReservedCgroup: "",
|
||||
KubeReservedCgroup: "",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
|
||||
ReservedSystemCPUs: "0-3",
|
||||
TopologyManagerScope: kubeletconfig.ContainerTopologyManagerScope,
|
||||
TopologyManagerPolicy: kubeletconfig.NoneTopologyManagerPolicy,
|
||||
ShutdownGracePeriod: metav1.Duration{Duration: 10 * time.Minute},
|
||||
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
|
||||
FeatureGates: map[string]bool{
|
||||
"CustomCPUCFSQuotaPeriod": true,
|
||||
"GracefulNodeShutdown": true,
|
||||
},
|
||||
}
|
||||
if allErrors := ValidateKubeletConfiguration(successCase2); allErrors != nil {
|
||||
@@ -102,68 +108,73 @@ func TestValidateKubeletConfiguration(t *testing.T) {
|
||||
}
|
||||
|
||||
errorCase1 := &kubeletconfig.KubeletConfiguration{
|
||||
CgroupsPerQOS: false,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved", "illegal-key"},
|
||||
SystemCgroups: "/",
|
||||
CgroupRoot: "",
|
||||
EventBurst: -10,
|
||||
EventRecordQPS: -10,
|
||||
HealthzPort: -10,
|
||||
ImageGCHighThresholdPercent: 101,
|
||||
ImageGCLowThresholdPercent: 101,
|
||||
IPTablesDropBit: -10,
|
||||
IPTablesMasqueradeBit: -10,
|
||||
KubeAPIBurst: -10,
|
||||
KubeAPIQPS: -10,
|
||||
MaxOpenFiles: -10,
|
||||
MaxPods: -10,
|
||||
OOMScoreAdj: -1001,
|
||||
PodsPerCore: -10,
|
||||
Port: 0,
|
||||
ReadOnlyPort: -10,
|
||||
RegistryBurst: -10,
|
||||
RegistryPullQPS: -10,
|
||||
HairpinMode: "foo",
|
||||
NodeLeaseDurationSeconds: -1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
|
||||
CgroupsPerQOS: false,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved", "illegal-key"},
|
||||
SystemCgroups: "/",
|
||||
CgroupRoot: "",
|
||||
EventBurst: -10,
|
||||
EventRecordQPS: -10,
|
||||
HealthzPort: -10,
|
||||
ImageGCHighThresholdPercent: 101,
|
||||
ImageGCLowThresholdPercent: 101,
|
||||
IPTablesDropBit: -10,
|
||||
IPTablesMasqueradeBit: -10,
|
||||
KubeAPIBurst: -10,
|
||||
KubeAPIQPS: -10,
|
||||
MaxOpenFiles: -10,
|
||||
MaxPods: -10,
|
||||
OOMScoreAdj: -1001,
|
||||
PodsPerCore: -10,
|
||||
Port: 0,
|
||||
ReadOnlyPort: -10,
|
||||
RegistryBurst: -10,
|
||||
RegistryPullQPS: -10,
|
||||
HairpinMode: "foo",
|
||||
NodeLeaseDurationSeconds: -1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
|
||||
ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second},
|
||||
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
|
||||
}
|
||||
const numErrsErrorCase1 = 27
|
||||
const numErrsErrorCase1 = 28
|
||||
if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 {
|
||||
t.Errorf("expect %d errors, got %v", numErrsErrorCase1, len(allErrors.(utilerrors.Aggregate).Errors()))
|
||||
}
|
||||
|
||||
errorCase2 := &kubeletconfig.KubeletConfiguration{
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
|
||||
SystemReservedCgroup: "/system.slice",
|
||||
KubeReservedCgroup: "/kubelet.service",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
|
||||
ReservedSystemCPUs: "0-3",
|
||||
TopologyManagerScope: "invalid",
|
||||
TopologyManagerPolicy: "invalid",
|
||||
CgroupsPerQOS: true,
|
||||
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
|
||||
SystemReservedCgroup: "/system.slice",
|
||||
KubeReservedCgroup: "/kubelet.service",
|
||||
SystemCgroups: "",
|
||||
CgroupRoot: "",
|
||||
EventBurst: 10,
|
||||
EventRecordQPS: 5,
|
||||
HealthzPort: 10248,
|
||||
ImageGCHighThresholdPercent: 85,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
IPTablesDropBit: 15,
|
||||
IPTablesMasqueradeBit: 14,
|
||||
KubeAPIBurst: 10,
|
||||
KubeAPIQPS: 5,
|
||||
MaxOpenFiles: 1000000,
|
||||
MaxPods: 110,
|
||||
OOMScoreAdj: -999,
|
||||
PodsPerCore: 100,
|
||||
Port: 65535,
|
||||
ReadOnlyPort: 0,
|
||||
RegistryBurst: 10,
|
||||
RegistryPullQPS: 5,
|
||||
HairpinMode: kubeletconfig.PromiscuousBridge,
|
||||
NodeLeaseDurationSeconds: 1,
|
||||
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
|
||||
ReservedSystemCPUs: "0-3",
|
||||
TopologyManagerScope: "invalid",
|
||||
TopologyManagerPolicy: "invalid",
|
||||
ShutdownGracePeriod: metav1.Duration{Duration: 40 * time.Second},
|
||||
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
|
||||
FeatureGates: map[string]bool{
|
||||
"CustomCPUCFSQuotaPeriod": true,
|
||||
"GracefulNodeShutdown": true,
|
||||
},
|
||||
}
|
||||
const numErrsErrorCase2 = 3
|
||||
|
2
pkg/kubelet/apis/config/zz_generated.deepcopy.go
generated
2
pkg/kubelet/apis/config/zz_generated.deepcopy.go
generated
@@ -271,6 +271,8 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
|
||||
copy(*out, *in)
|
||||
}
|
||||
out.Logging = in.Logging
|
||||
out.ShutdownGracePeriod = in.ShutdownGracePeriod
|
||||
out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
|
||||
return
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user