Implement shutdown manager in kubelet

Implements KEP 2000, Graceful Node Shutdown:
https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2000-graceful-node-shutdown

* Add new FeatureGate `GracefulNodeShutdown` to control
enabling/disabling the feature
* Add two new KubeletConfiguration options
  * `ShutdownGracePeriod` and `ShutdownGracePeriodCriticalPods`
* Add new package, `nodeshutdown` that implements the Node shutdown
manager
  * The node shutdown manager uses the systemd inhibit package, to
  create an system inhibitor, monitor for node shutdown events, and
  gracefully terminate pods upon a node shutdown.
This commit is contained in:
David Porter
2020-11-02 23:18:36 +00:00
parent 2343689ce7
commit 16f71c6d47
20 changed files with 896 additions and 124 deletions

View File

@@ -27,36 +27,39 @@ import (
func TestValidateKubeletConfiguration(t *testing.T) {
successCase1 := &kubeletconfig.KubeletConfiguration{
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
SystemReservedCgroup: "/system.slice",
KubeReservedCgroup: "/kubelet.service",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Millisecond},
TopologyManagerScope: kubeletconfig.PodTopologyManagerScope,
TopologyManagerPolicy: kubeletconfig.SingleNumaNodeTopologyManagerPolicy,
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
SystemReservedCgroup: "/system.slice",
KubeReservedCgroup: "/kubelet.service",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 25 * time.Millisecond},
TopologyManagerScope: kubeletconfig.PodTopologyManagerScope,
TopologyManagerPolicy: kubeletconfig.SingleNumaNodeTopologyManagerPolicy,
ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
},
}
if allErrors := ValidateKubeletConfiguration(successCase1); allErrors != nil {
@@ -64,37 +67,40 @@ func TestValidateKubeletConfiguration(t *testing.T) {
}
successCase2 := &kubeletconfig.KubeletConfiguration{
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods"},
SystemReservedCgroup: "",
KubeReservedCgroup: "",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
ReservedSystemCPUs: "0-3",
TopologyManagerScope: kubeletconfig.ContainerTopologyManagerScope,
TopologyManagerPolicy: kubeletconfig.NoneTopologyManagerPolicy,
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods"},
SystemReservedCgroup: "",
KubeReservedCgroup: "",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
ReservedSystemCPUs: "0-3",
TopologyManagerScope: kubeletconfig.ContainerTopologyManagerScope,
TopologyManagerPolicy: kubeletconfig.NoneTopologyManagerPolicy,
ShutdownGracePeriod: metav1.Duration{Duration: 10 * time.Minute},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
},
}
if allErrors := ValidateKubeletConfiguration(successCase2); allErrors != nil {
@@ -102,68 +108,73 @@ func TestValidateKubeletConfiguration(t *testing.T) {
}
errorCase1 := &kubeletconfig.KubeletConfiguration{
CgroupsPerQOS: false,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved", "illegal-key"},
SystemCgroups: "/",
CgroupRoot: "",
EventBurst: -10,
EventRecordQPS: -10,
HealthzPort: -10,
ImageGCHighThresholdPercent: 101,
ImageGCLowThresholdPercent: 101,
IPTablesDropBit: -10,
IPTablesMasqueradeBit: -10,
KubeAPIBurst: -10,
KubeAPIQPS: -10,
MaxOpenFiles: -10,
MaxPods: -10,
OOMScoreAdj: -1001,
PodsPerCore: -10,
Port: 0,
ReadOnlyPort: -10,
RegistryBurst: -10,
RegistryPullQPS: -10,
HairpinMode: "foo",
NodeLeaseDurationSeconds: -1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
CgroupsPerQOS: false,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved", "illegal-key"},
SystemCgroups: "/",
CgroupRoot: "",
EventBurst: -10,
EventRecordQPS: -10,
HealthzPort: -10,
ImageGCHighThresholdPercent: 101,
ImageGCLowThresholdPercent: 101,
IPTablesDropBit: -10,
IPTablesMasqueradeBit: -10,
KubeAPIBurst: -10,
KubeAPIQPS: -10,
MaxOpenFiles: -10,
MaxPods: -10,
OOMScoreAdj: -1001,
PodsPerCore: -10,
Port: 0,
ReadOnlyPort: -10,
RegistryBurst: -10,
RegistryPullQPS: -10,
HairpinMode: "foo",
NodeLeaseDurationSeconds: -1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
}
const numErrsErrorCase1 = 27
const numErrsErrorCase1 = 28
if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 {
t.Errorf("expect %d errors, got %v", numErrsErrorCase1, len(allErrors.(utilerrors.Aggregate).Errors()))
}
errorCase2 := &kubeletconfig.KubeletConfiguration{
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
SystemReservedCgroup: "/system.slice",
KubeReservedCgroup: "/kubelet.service",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
ReservedSystemCPUs: "0-3",
TopologyManagerScope: "invalid",
TopologyManagerPolicy: "invalid",
CgroupsPerQOS: true,
EnforceNodeAllocatable: []string{"pods", "system-reserved", "kube-reserved"},
SystemReservedCgroup: "/system.slice",
KubeReservedCgroup: "/kubelet.service",
SystemCgroups: "",
CgroupRoot: "",
EventBurst: 10,
EventRecordQPS: 5,
HealthzPort: 10248,
ImageGCHighThresholdPercent: 85,
ImageGCLowThresholdPercent: 80,
IPTablesDropBit: 15,
IPTablesMasqueradeBit: 14,
KubeAPIBurst: 10,
KubeAPIQPS: 5,
MaxOpenFiles: 1000000,
MaxPods: 110,
OOMScoreAdj: -999,
PodsPerCore: 100,
Port: 65535,
ReadOnlyPort: 0,
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
NodeLeaseDurationSeconds: 1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 50 * time.Millisecond},
ReservedSystemCPUs: "0-3",
TopologyManagerScope: "invalid",
TopologyManagerPolicy: "invalid",
ShutdownGracePeriod: metav1.Duration{Duration: 40 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
},
}
const numErrsErrorCase2 = 3