diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index f2cb62444a4..ff5fed13114 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -623,6 +623,11 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) { return false, 50 * time.Millisecond } _, condition := controllerutil.GetNodeCondition(&node.Status, v1.NodeReady) + if condition == nil { + logger.Info("Failed to get NodeCondition from the node status", "node", klog.KRef("", value.Value)) + // retry in 50 millisecond + return false, 50 * time.Millisecond + } // Because we want to mimic NodeStatus.Condition["Ready"] we make "unreachable" and "not ready" taints mutually exclusive. taintToAdd := v1.Taint{} oppositeTaint := v1.Taint{} diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go index 1946ca40dc3..2d348d29dc2 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go @@ -2352,6 +2352,29 @@ func TestApplyNoExecuteTaints(t *testing.T) { }, }, }, + // NotReady Taint with NoExecute effect should not be applied to a node if the NodeCondition Type NodeReady has been set to nil in the interval between the NodeController enqueuing the node and the taint manager picking up. + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + v1.LabelTopologyRegion: "region1", + v1.LabelTopologyZone: "zone1", + v1.LabelFailureDomainBetaRegion: "region1", + v1.LabelFailureDomainBetaZone: "zone1", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), } @@ -2365,6 +2388,24 @@ func TestApplyNoExecuteTaints(t *testing.T) { }, }, } + unhealthyNodeNewStatus := v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2017, 1, 1, 12, 10, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + } + overrideNodeNewStatusConditions := []v1.NodeCondition{ + { + Type: "MemoryPressure", + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2017, 1, 1, 12, 10, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC), + }, + } originalTaint := UnreachableTaintTemplate _, ctx := ktesting.NewTestContext(t) nodeController, _ := newNodeLifecycleControllerFromClient( @@ -2429,6 +2470,44 @@ func TestApplyNoExecuteTaints(t *testing.T) { if taintutils.TaintExists(node2.Spec.Taints, NotReadyTaintTemplate) || len(node2.Spec.Taints) > 0 { t.Errorf("Found taint %v in %v, which should not be present", NotReadyTaintTemplate, node2.Spec.Taints) } + + node3, err := fakeNodeHandler.Get(ctx, "node3", metav1.GetOptions{}) + if err != nil { + t.Errorf("Can't get current node3...") + return + } + node3.Status = unhealthyNodeNewStatus + _, err = fakeNodeHandler.UpdateStatus(ctx, node3, metav1.UpdateOptions{}) + if err != nil { + t.Errorf(err.Error()) + return + } + if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { + t.Errorf("unexpected error: %v", err) + } + if err := nodeController.monitorNodeHealth(ctx); err != nil { + t.Errorf("unexpected error: %v", err) + } + // Before taint manager work, the status has been replaced(maybe merge-patch replace). + node3.Status.Conditions = overrideNodeNewStatusConditions + _, err = fakeNodeHandler.UpdateStatus(ctx, node3, metav1.UpdateOptions{}) + if err != nil { + t.Errorf(err.Error()) + return + } + if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { + t.Errorf("unexpected error: %v", err) + } + nodeController.doNoExecuteTaintingPass(ctx) + node3, err = fakeNodeHandler.Get(ctx, "node3", metav1.GetOptions{}) + if err != nil { + t.Errorf("Can't get current node3...") + return + } + // We should not see any taint on the node(especially the Not-Ready taint with NoExecute effect). + if taintutils.TaintExists(node3.Spec.Taints, NotReadyTaintTemplate) || len(node3.Spec.Taints) > 0 { + t.Errorf("Found taint %v in %v, which should not be present", NotReadyTaintTemplate, node3.Spec.Taints) + } } // TestApplyNoExecuteTaintsToNodesEnqueueTwice ensures we taint every node with NoExecute even if enqueued twice