mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Node lifecycle controller should use a label for excluding nodes
The current mechanism for excluding "master" nodes based on node names is fragile and should be fixed by using a label exclusion similar to service load balancers. The legacy code path is preserved behind a defaulted-on gate and will be removed in the future.
This commit is contained in:
parent
0f49d892d5
commit
2888e6e923
@ -17,7 +17,6 @@ go_library(
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//pkg/util/metrics:go_default_library",
|
||||
"//pkg/util/node:go_default_library",
|
||||
"//pkg/util/system:go_default_library",
|
||||
"//pkg/util/taints:go_default_library",
|
||||
"//staging/src/k8s.io/api/coordination/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
|
@ -23,13 +23,14 @@ package nodelifecycle
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog"
|
||||
|
||||
coordv1beta1 "k8s.io/api/coordination/v1beta1"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -54,11 +55,11 @@ import (
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
|
||||
nodeutil "k8s.io/kubernetes/pkg/controller/util/node"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/pkg/util/metrics"
|
||||
utilnode "k8s.io/kubernetes/pkg/util/node"
|
||||
"k8s.io/kubernetes/pkg/util/system"
|
||||
taintutils "k8s.io/kubernetes/pkg/util/taints"
|
||||
)
|
||||
|
||||
@ -715,8 +716,8 @@ func (nc *Controller) monitorNodeHealth() error {
|
||||
continue
|
||||
}
|
||||
|
||||
// We do not treat a master node as a part of the cluster for network disruption checking.
|
||||
if !system.IsMasterNode(node.Name) {
|
||||
// Some nodes may be excluded from disruption checking
|
||||
if !isNodeExcludedFromDisruptionChecks(node) {
|
||||
zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)
|
||||
}
|
||||
|
||||
@ -806,6 +807,45 @@ func (nc *Controller) monitorNodeHealth() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// labelNodeDisruptionExclusion is a label on nodes that controls whether they are
|
||||
// excluded from being considered for disruption checks by the node controller.
|
||||
const labelNodeDisruptionExclusion = "node.kubernetes.io/exclude-disruption"
|
||||
|
||||
func isNodeExcludedFromDisruptionChecks(node *v1.Node) bool {
|
||||
// DEPRECATED: will be removed in 1.19
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LegacyNodeRoleBehavior) {
|
||||
if legacyIsMasterNode(node.Name) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeDisruptionExclusion) {
|
||||
if _, ok := node.Labels[labelNodeDisruptionExclusion]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// legacyIsMasterNode returns true if given node is a registered master according
|
||||
// to the logic historically used for this function. This code path is deprecated
|
||||
// and the node disruption exclusion label should be used in the future.
|
||||
// This code will not be allowed to update to use the node-role label, since
|
||||
// node-roles may not be used for feature enablement.
|
||||
// DEPRECATED: Will be removed in 1.19
|
||||
func legacyIsMasterNode(nodeName string) bool {
|
||||
// We are trying to capture "master(-...)?$" regexp.
|
||||
// However, using regexp.MatchString() results even in more than 35%
|
||||
// of all space allocations in ControllerManager spent in this function.
|
||||
// That's why we are trying to be a bit smarter.
|
||||
if strings.HasSuffix(nodeName, "master") {
|
||||
return true
|
||||
}
|
||||
if len(nodeName) >= 10 {
|
||||
return strings.HasSuffix(nodeName[:len(nodeName)-3], "master-")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
|
||||
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
|
||||
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
|
||||
|
@ -23,7 +23,7 @@ import (
|
||||
|
||||
apps "k8s.io/api/apps/v1"
|
||||
coordv1beta1 "k8s.io/api/coordination/v1beta1"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -3270,3 +3270,35 @@ func TestTryUpdateNodeHealth(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_isNodeExcludedFromDisruptionChecks(t *testing.T) {
|
||||
validNodeStatus := v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: "Test"}}}
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
enableExclusion bool
|
||||
enableLegacy bool
|
||||
input *v1.Node
|
||||
want bool
|
||||
}{
|
||||
{want: false, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{}}}},
|
||||
{want: false, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Name: "master-abc"}}},
|
||||
{want: false, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{labelNodeDisruptionExclusion: ""}}}},
|
||||
|
||||
{want: false, enableExclusion: true, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Name: "master-abc"}}},
|
||||
{want: false, enableLegacy: true, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{labelNodeDisruptionExclusion: ""}}}},
|
||||
|
||||
{want: true, enableLegacy: true, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Name: "master-abc"}}},
|
||||
{want: true, enableExclusion: true, input: &v1.Node{Status: validNodeStatus, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{labelNodeDisruptionExclusion: ""}}}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeDisruptionExclusion, tt.enableExclusion)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LegacyNodeRoleBehavior, tt.enableLegacy)()
|
||||
|
||||
if result := isNodeExcludedFromDisruptionChecks(tt.input); result != tt.want {
|
||||
t.Errorf("isNodeExcludedFromDisruptionChecks() = %v, want %v", result, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -184,6 +184,12 @@ const (
|
||||
// Enable nodes to exclude themselves from service load balancers
|
||||
ServiceNodeExclusion featuregate.Feature = "ServiceNodeExclusion"
|
||||
|
||||
// owner @smarterclayton
|
||||
// alpha: v1.16
|
||||
//
|
||||
// Enable nodes to exclude themselves from network disruption checks
|
||||
NodeDisruptionExclusion featuregate.Feature = "NodeDisruptionExclusion"
|
||||
|
||||
// owner: @jsafrane
|
||||
// alpha: v1.9
|
||||
//
|
||||
@ -508,6 +514,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha},
|
||||
TopologyManager: {Default: false, PreRelease: featuregate.Alpha},
|
||||
ServiceNodeExclusion: {Default: false, PreRelease: featuregate.Alpha},
|
||||
NodeDisruptionExclusion: {Default: false, PreRelease: featuregate.Alpha},
|
||||
MountContainers: {Default: false, PreRelease: featuregate.Alpha},
|
||||
CSIDriverRegistry: {Default: true, PreRelease: featuregate.Beta},
|
||||
CSINodeInfo: {Default: true, PreRelease: featuregate.Beta},
|
||||
|
Loading…
Reference in New Issue
Block a user