Merge pull request #60009 from zetaab/nodetaint

Automatic merge from submit-queue (batch tested with PRs 63129, 63066, 60009, 63136, 63086). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. add node shutdown taint **What this PR does / why we need it**: we need node stopped taint in order to detach volumes immediately without waiting timeout. More info in issue ticket #58635 **Which issue(s) this PR fixes** Fixes #58635 **Special notes for your reviewer**: this was reverted, original PR https://github.com/kubernetes/kubernetes/pull/59323 Hopefully now bugs are fixed. However, I will execute more tests manually today. **Release note**: ```release-note NONE ```
2025-07-29 14:37:00 +00:00 · 2018-04-25 06:29:22 -07:00 · 2018-04-25 06:29:22 -07:00 · 02e3577ab1
commit 02e3577ab1
parent 9a841b3efd de967b717d
22 changed files with 381 additions and 5 deletions
--- a/pkg/cloudprovider/cloud.go
+++ b/pkg/cloudprovider/cloud.go
@ -145,6 +145,8 @@ type Instances interface {
 	// InstanceExistsByProviderID returns true if the instance for the given provider id still is running.
 	// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
 	InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error)
+	// InstanceShutdownByProviderID returns true if the instance is shutdown in cloudprovider
+	InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error)
 }

 // Route is a representation of an advanced routing rule.
--- a/pkg/cloudprovider/providers/aws/aws.go
+++ b/pkg/cloudprovider/providers/aws/aws.go
@ -1350,6 +1350,11 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin
 	return true, nil
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (c *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceID returns the cloud provider ID of the node with the specified nodeName.
 func (c *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
 	// In the future it is possible to also return an endpoint as:
--- a/pkg/cloudprovider/providers/azure/azure_instances.go
+++ b/pkg/cloudprovider/providers/azure/azure_instances.go
@ -115,6 +115,11 @@ func (az *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID stri
 	return true, nil
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (az *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 func (az *Cloud) isCurrentInstance(name types.NodeName) (bool, error) {
 	nodeName := mapNodeNameToVMName(name)
 	metadataName, err := az.metadata.Text("instance/compute/name")
--- a/pkg/cloudprovider/providers/cloudstack/cloudstack_instances.go
+++ b/pkg/cloudprovider/providers/cloudstack/cloudstack_instances.go
@ -153,3 +153,8 @@ func (cs *CSCloud) InstanceExistsByProviderID(ctx context.Context, providerID st

 	return true, nil
 }
+
+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (cs *CSCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
--- a/pkg/cloudprovider/providers/cloudstack/metadata.go
+++ b/pkg/cloudprovider/providers/cloudstack/metadata.go
@ -114,6 +114,11 @@ func (m *metadata) InstanceExistsByProviderID(ctx context.Context, providerID st
 	return false, errors.New("InstanceExistsByProviderID not implemented")
 }

+// InstanceShutdownByProviderID returns if the instance is shutdown.
+func (m *metadata) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // GetZone returns the Zone containing the region that the program is running in.
 func (m *metadata) GetZone(ctx context.Context) (cloudprovider.Zone, error) {
 	zone := cloudprovider.Zone{}
--- a/pkg/cloudprovider/providers/fake/fake.go
+++ b/pkg/cloudprovider/providers/fake/fake.go
@ -50,8 +50,10 @@ type FakeCloud struct {
 	Exists bool
 	Err    error

-	ExistsByProviderID bool
-	ErrByProviderID    error
+	ExistsByProviderID      bool
+	ErrByProviderID         error
+	NodeShutdown            bool
+	ErrShutdownByProviderID error

 	Calls         []string
 	Addresses     []v1.NodeAddress
@ -233,6 +235,12 @@ func (f *FakeCloud) InstanceExistsByProviderID(ctx context.Context, providerID s
 	return f.ExistsByProviderID, f.ErrByProviderID
 }

+// InstanceShutdownByProviderID returns true if the instances is in safe state to detach volumes
+func (f *FakeCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	f.addCall("instance-shutdown-by-provider-id")
+	return f.NodeShutdown, f.ErrShutdownByProviderID
+}
+
 // List is a test-spy implementation of Instances.List.
 // It adds an entry "list" into the internal method call record.
 func (f *FakeCloud) List(filter string) ([]types.NodeName, error) {
--- a/pkg/cloudprovider/providers/gce/gce_instances.go
+++ b/pkg/cloudprovider/providers/gce/gce_instances.go
@ -141,6 +141,11 @@ func (gce *GCECloud) instanceByProviderID(providerID string) (*gceInstance, erro
 	return instance, nil
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (gce *GCECloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceTypeByProviderID returns the cloudprovider instance type of the node
 // with the specified unique providerID This method will not be called from the
 // node that is requesting this ID. i.e. metadata service and other local
--- a/pkg/cloudprovider/providers/openstack/openstack_instances.go
+++ b/pkg/cloudprovider/providers/openstack/openstack_instances.go
@ -134,6 +134,11 @@ func (i *Instances) InstanceExistsByProviderID(ctx context.Context, providerID s
 	return true, nil
 }

+// InstanceShutdownByProviderID returns true if the instances is in safe state to detach volumes
+func (i *Instances) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceID returns the kubelet's cloud provider ID.
 func (os *OpenStack) InstanceID() (string, error) {
 	if len(os.localInstanceID) == 0 {
--- a/pkg/cloudprovider/providers/ovirt/ovirt.go
+++ b/pkg/cloudprovider/providers/ovirt/ovirt.go
@ -202,6 +202,11 @@ func (v *OVirtCloud) InstanceExistsByProviderID(ctx context.Context, providerID
 	return false, cloudprovider.NotImplemented
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (v *OVirtCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceID returns the cloud provider ID of the node with the specified NodeName.
 func (v *OVirtCloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
 	name := mapNodeNameToInstanceName(nodeName)
--- a/pkg/cloudprovider/providers/photon/photon.go
+++ b/pkg/cloudprovider/providers/photon/photon.go
@ -460,6 +460,11 @@ func (pc *PCCloud) InstanceExistsByProviderID(ctx context.Context, providerID st
 	return false, cloudprovider.NotImplemented
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (pc *PCCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceID returns the cloud provider ID of the specified instance.
 func (pc *PCCloud) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) {
 	name := string(nodeName)
--- a/pkg/cloudprovider/providers/vsphere/vsphere.go
+++ b/pkg/cloudprovider/providers/vsphere/vsphere.go
@ -605,6 +605,11 @@ func (vs *VSphere) InstanceExistsByProviderID(ctx context.Context, providerID st
 	return false, err
 }

+// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
+func (vs *VSphere) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, cloudprovider.NotImplemented
+}
+
 // InstanceID returns the cloud provider ID of the node with the specified Name.
 func (vs *VSphere) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) {

--- a/pkg/controller/BUILD
+++ b/pkg/controller/BUILD
@ -56,6 +56,7 @@ go_library(
        "//pkg/apis/core:go_default_library",
        "//pkg/apis/core/install:go_default_library",
        "//pkg/apis/core/validation:go_default_library",
+        "//pkg/scheduler/algorithm:go_default_library",
        "//pkg/serviceaccount:go_default_library",
        "//pkg/util/hash:go_default_library",
        "//pkg/util/taints:go_default_library",
--- a/pkg/controller/cloud/BUILD
+++ b/pkg/controller/cloud/BUILD
@ -17,6 +17,7 @@ go_library(
        "//pkg/api/v1/node:go_default_library",
        "//pkg/cloudprovider:go_default_library",
        "//pkg/controller:go_default_library",
+        "//pkg/controller/util/node:go_default_library",
        "//pkg/kubelet/apis:go_default_library",
        "//pkg/scheduler/algorithm:go_default_library",
        "//pkg/util/node:go_default_library",
--- a/pkg/controller/cloud/node_controller.go
+++ b/pkg/controller/cloud/node_controller.go
@ -37,6 +37,8 @@ import (
 	clientretry "k8s.io/client-go/util/retry"
 	nodeutilv1 "k8s.io/kubernetes/pkg/api/v1/node"
 	"k8s.io/kubernetes/pkg/cloudprovider"
+	"k8s.io/kubernetes/pkg/controller"
+	nodectrlutil "k8s.io/kubernetes/pkg/controller/util/node"
 	kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
 	"k8s.io/kubernetes/pkg/scheduler/algorithm"
 	nodeutil "k8s.io/kubernetes/pkg/util/node"
@ -243,6 +245,24 @@ func (cnc *CloudNodeController) MonitorNode() {
 		// from the cloud provider. If node cannot be found in cloudprovider, then delete the node immediately
 		if currentReadyCondition != nil {
 			if currentReadyCondition.Status != v1.ConditionTrue {
+				// we need to check this first to get taint working in similar in all cloudproviders
+				// current problem is that shutdown nodes are not working in similar way ie. all cloudproviders
+				// does not delete node from kubernetes cluster when instance it is shutdown see issue #46442
+				shutdown, err := nodectrlutil.ShutdownInCloudProvider(context.TODO(), cnc.cloud, node)
+				if err != nil {
+					glog.Errorf("Error getting data for node %s from cloud: %v", node.Name, err)
+				}
+
+				if shutdown && err == nil {
+					// if node is shutdown add shutdown taint
+					err = controller.AddOrUpdateTaintOnNode(cnc.kubeClient, node.Name, controller.ShutdownTaint)
+					if err != nil {
+						glog.Errorf("Error patching node taints: %v", err)
+					}
+					// Continue checking the remaining nodes since the current one is shutdown.
+					continue
+				}
+
 				// Check with the cloud provider to see if the node still exists. If it
 				// doesn't, delete the node immediately.
 				exists, err := ensureNodeExistsByProviderIDOrInstanceID(instances, node)
@ -275,6 +295,12 @@ func (cnc *CloudNodeController) MonitorNode() {
 					}
 				}(node.Name)

+			} else {
+				// if taint exist remove taint
+				err = controller.RemoveTaintOffNode(cnc.kubeClient, node.Name, node, controller.ShutdownTaint)
+				if err != nil {
+					glog.Errorf("Error patching node taints: %v", err)
+				}
 			}
 		}
 	}
--- a/pkg/controller/cloud/node_controller_test.go
+++ b/pkg/controller/cloud/node_controller_test.go
@ -148,6 +148,115 @@ func TestEnsureNodeExistsByProviderIDOrNodeName(t *testing.T) {

 }

+func TestNodeShutdown(t *testing.T) {
+
+	testCases := []struct {
+		testName           string
+		node               *v1.Node
+		existsByProviderID bool
+		shutdown           bool
+	}{
+		{
+			testName:           "node shutdowned add taint",
+			existsByProviderID: true,
+			shutdown:           true,
+			node: &v1.Node{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:              "node0",
+					CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+				},
+				Spec: v1.NodeSpec{
+					ProviderID: "node0",
+				},
+				Status: v1.NodeStatus{
+					Conditions: []v1.NodeCondition{
+						{
+							Type:               v1.NodeReady,
+							Status:             v1.ConditionUnknown,
+							LastHeartbeatTime:  metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+							LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						},
+					},
+				},
+			},
+		},
+		{
+			testName:           "node started after shutdown remove taint",
+			existsByProviderID: true,
+			shutdown:           false,
+			node: &v1.Node{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:              "node0",
+					CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+				},
+				Spec: v1.NodeSpec{
+					ProviderID: "node0",
+					Taints: []v1.Taint{
+						{
+							Key:    algorithm.TaintNodeShutdown,
+							Effect: v1.TaintEffectNoSchedule,
+						},
+					},
+				},
+				Status: v1.NodeStatus{
+					Conditions: []v1.NodeCondition{
+						{
+							Type:               v1.NodeReady,
+							Status:             v1.ConditionTrue,
+							LastHeartbeatTime:  metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+							LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						},
+					},
+				},
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.testName, func(t *testing.T) {
+			fc := &fakecloud.FakeCloud{
+				ExistsByProviderID: tc.existsByProviderID,
+				NodeShutdown:       tc.shutdown,
+			}
+			fnh := &testutil.FakeNodeHandler{
+				Existing:      []*v1.Node{tc.node},
+				Clientset:     fake.NewSimpleClientset(),
+				PatchWaitChan: make(chan struct{}),
+			}
+
+			factory := informers.NewSharedInformerFactory(fnh, controller.NoResyncPeriodFunc())
+
+			eventBroadcaster := record.NewBroadcaster()
+			cloudNodeController := &CloudNodeController{
+				kubeClient:                fnh,
+				nodeInformer:              factory.Core().V1().Nodes(),
+				cloud:                     fc,
+				nodeMonitorPeriod:         1 * time.Second,
+				recorder:                  eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}),
+				nodeStatusUpdateFrequency: 1 * time.Second,
+			}
+			eventBroadcaster.StartLogging(glog.Infof)
+
+			cloudNodeController.Run()
+
+			select {
+			case <-fnh.PatchWaitChan:
+			case <-time.After(1 * time.Second):
+				t.Errorf("Timed out waiting %v for node to be updated", wait.ForeverTestTimeout)
+			}
+
+			assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated")
+			if tc.shutdown {
+				assert.Equal(t, 1, len(fnh.UpdatedNodes[0].Spec.Taints), "Node Taint was not added")
+				assert.Equal(t, "node.cloudprovider.kubernetes.io/shutdown", fnh.UpdatedNodes[0].Spec.Taints[0].Key, "Node Taint key is not correct")
+			} else {
+				assert.Equal(t, 0, len(fnh.UpdatedNodes[0].Spec.Taints), "Node Taint was not removed after node is back in ready state")
+			}
+
+		})
+	}
+
+}
+
 // This test checks that the node is deleted when kubelet stops reporting
 // and cloud provider says node is gone
 func TestNodeDeleted(t *testing.T) {
--- a/pkg/controller/controller_utils.go
+++ b/pkg/controller/controller_utils.go
@ -45,6 +45,7 @@ import (
 	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 	_ "k8s.io/kubernetes/pkg/apis/core/install"
 	"k8s.io/kubernetes/pkg/apis/core/validation"
+	"k8s.io/kubernetes/pkg/scheduler/algorithm"
 	hashutil "k8s.io/kubernetes/pkg/util/hash"
 	taintutils "k8s.io/kubernetes/pkg/util/taints"

@ -86,6 +87,11 @@ var UpdateTaintBackoff = wait.Backoff{
 	Jitter:   1.0,
 }

+var ShutdownTaint = &v1.Taint{
+	Key:    algorithm.TaintNodeShutdown,
+	Effect: v1.TaintEffectNoSchedule,
+}
+
 var (
 	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
 )
--- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go
+++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go
@ -22,6 +22,7 @@ limitations under the License.
 package nodelifecycle

 import (
+	"context"
 	"fmt"
 	"sync"
 	"time"
@ -151,9 +152,10 @@ type Controller struct {
 	daemonSetStore          extensionslisters.DaemonSetLister
 	daemonSetInformerSynced cache.InformerSynced

-	nodeLister                corelisters.NodeLister
-	nodeInformerSynced        cache.InformerSynced
-	nodeExistsInCloudProvider func(types.NodeName) (bool, error)
+	nodeLister                  corelisters.NodeLister
+	nodeInformerSynced          cache.InformerSynced
+	nodeExistsInCloudProvider   func(types.NodeName) (bool, error)
+	nodeShutdownInCloudProvider func(context.Context, *v1.Node) (bool, error)

 	recorder record.EventRecorder

@ -239,6 +241,9 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
 		nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
 			return nodeutil.ExistsInCloudProvider(cloud, nodeName)
 		},
+		nodeShutdownInCloudProvider: func(ctx context.Context, node *v1.Node) (bool, error) {
+			return nodeutil.ShutdownInCloudProvider(ctx, cloud, node)
+		},
 		recorder:                    recorder,
 		nodeMonitorPeriod:           nodeMonitorPeriod,
 		nodeStartupGracePeriod:      nodeStartupGracePeriod,
@ -667,6 +672,11 @@ func (nc *Controller) monitorNodeStatus() error {
 						glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
 					}
 				}
+				// remove shutdown taint this is needed always depending do we use taintbased or not
+				err := nc.markNodeAsNotShutdown(node)
+				if err != nil {
+					glog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
+				}
 			}

 			// Report node event.
@ -680,6 +690,19 @@ func (nc *Controller) monitorNodeStatus() error {
 			// Check with the cloud provider to see if the node still exists. If it
 			// doesn't, delete the node immediately.
 			if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil {
+				// check is node shutdowned, if yes do not deleted it. Instead add taint
+				shutdown, err := nc.nodeShutdownInCloudProvider(context.TODO(), node)
+				if err != nil {
+					glog.Errorf("Error determining if node %v shutdown in cloud: %v", node.Name, err)
+				}
+				// node shutdown
+				if shutdown && err == nil {
+					err = controller.AddOrUpdateTaintOnNode(nc.kubeClient, node.Name, controller.ShutdownTaint)
+					if err != nil {
+						glog.Errorf("Error patching node taints: %v", err)
+					}
+					continue
+				}
 				exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
 				if err != nil {
 					glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
@ -1118,6 +1141,17 @@ func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
 	return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Remove(node.Name), nil
 }

+func (nc *Controller) markNodeAsNotShutdown(node *v1.Node) error {
+	nc.evictorLock.Lock()
+	defer nc.evictorLock.Unlock()
+	err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, controller.ShutdownTaint)
+	if err != nil {
+		glog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
+		return err
+	}
+	return nil
+}
+
 // ComputeZoneState returns a slice of NodeReadyConditions for all Nodes in a given zone.
 // The zone is considered:
 // - fullyDisrupted if there're no Ready Nodes,
--- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go
+++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go
@ -17,6 +17,7 @@ limitations under the License.
 package nodelifecycle

 import (
+	"context"
 	"strings"
 	"testing"
 	"time"
@ -1360,6 +1361,118 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
 	}
 }

+func TestCloudProviderNodeShutdown(t *testing.T) {
+
+	testCases := []struct {
+		testName string
+		node     *v1.Node
+		shutdown bool
+	}{
+		{
+			testName: "node shutdowned add taint",
+			shutdown: true,
+			node: &v1.Node{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:              "node0",
+					CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+				},
+				Spec: v1.NodeSpec{
+					ProviderID: "node0",
+				},
+				Status: v1.NodeStatus{
+					Conditions: []v1.NodeCondition{
+						{
+							Type:               v1.NodeReady,
+							Status:             v1.ConditionUnknown,
+							LastHeartbeatTime:  metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+							LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						},
+					},
+				},
+			},
+		},
+		{
+			testName: "node started after shutdown remove taint",
+			shutdown: false,
+			node: &v1.Node{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:              "node0",
+					CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+				},
+				Spec: v1.NodeSpec{
+					ProviderID: "node0",
+					Taints: []v1.Taint{
+						{
+							Key:    algorithm.TaintNodeShutdown,
+							Effect: v1.TaintEffectNoSchedule,
+						},
+					},
+				},
+				Status: v1.NodeStatus{
+					Conditions: []v1.NodeCondition{
+						{
+							Type:               v1.NodeReady,
+							Status:             v1.ConditionTrue,
+							LastHeartbeatTime:  metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+							LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						},
+					},
+				},
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.testName, func(t *testing.T) {
+			fnh := &testutil.FakeNodeHandler{
+				Existing:  []*v1.Node{tc.node},
+				Clientset: fake.NewSimpleClientset(),
+			}
+			nodeController, _ := newNodeLifecycleControllerFromClient(
+				nil,
+				fnh,
+				10*time.Minute,
+				testRateLimiterQPS,
+				testRateLimiterQPS,
+				testLargeClusterThreshold,
+				testUnhealthyThreshold,
+				testNodeMonitorGracePeriod,
+				testNodeStartupGracePeriod,
+				testNodeMonitorPeriod,
+				false)
+			nodeController.cloud = &fakecloud.FakeCloud{}
+			nodeController.now = func() metav1.Time { return metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) }
+			nodeController.recorder = testutil.NewFakeRecorder()
+			nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
+				return tc.shutdown, nil
+			}
+
+			if err := nodeController.syncNodeStore(fnh); err != nil {
+				t.Errorf("unexpected error: %v", err)
+			}
+			if err := nodeController.monitorNodeStatus(); err != nil {
+				t.Errorf("unexpected error: %v", err)
+			}
+
+			if len(fnh.UpdatedNodes) != 1 {
+				t.Errorf("Node was not updated")
+			}
+			if tc.shutdown {
+				if len(fnh.UpdatedNodes[0].Spec.Taints) != 1 {
+					t.Errorf("Node Taint was not added")
+				}
+				if fnh.UpdatedNodes[0].Spec.Taints[0].Key != "node.cloudprovider.kubernetes.io/shutdown" {
+					t.Errorf("Node Taint key is not correct")
+				}
+			} else {
+				if len(fnh.UpdatedNodes[0].Spec.Taints) != 0 {
+					t.Errorf("Node Taint was not removed after node is back in ready state")
+				}
+			}
+		})
+	}
+
+}
+
 // TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
 // pods and the node when kubelet has not reported, and the cloudprovider says
 // the node is gone.
@ -1404,6 +1517,9 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
 	nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
 		return false, nil
 	}
+	nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
+		return false, nil
+	}
 	// monitorNodeStatus should allow this node to be immediately deleted
 	if err := nodeController.syncNodeStore(fnh); err != nil {
 		t.Errorf("unexpected error: %v", err)
@ -2224,6 +2340,9 @@ func TestNodeEventGeneration(t *testing.T) {
 	nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
 		return false, nil
 	}
+	nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
+		return false, nil
+	}
 	nodeController.now = func() metav1.Time { return fakeNow }
 	fakeRecorder := testutil.NewFakeRecorder()
 	nodeController.recorder = fakeRecorder
--- a/pkg/controller/testutil/test_utils.go
+++ b/pkg/controller/testutil/test_utils.go
@ -67,6 +67,7 @@ type FakeNodeHandler struct {
 	// Synchronization
 	lock           sync.Mutex
 	DeleteWaitChan chan struct{}
+	PatchWaitChan  chan struct{}
 }

 // FakeLegacyHandler is a fake implemtation of CoreV1Interface.
@ -270,6 +271,9 @@ func (m *FakeNodeHandler) Patch(name string, pt types.PatchType, data []byte, su
 	m.lock.Lock()
 	defer func() {
 		m.RequestCount++
+		if m.PatchWaitChan != nil {
+			m.PatchWaitChan <- struct{}{}
+		}
 		m.lock.Unlock()
 	}()
 	var nodeCopy v1.Node
--- a/pkg/controller/util/node/controller_utils.go
+++ b/pkg/controller/util/node/controller_utils.go
@ -187,6 +187,20 @@ func ExistsInCloudProvider(cloud cloudprovider.Interface, nodeName types.NodeNam
 	return true, nil
 }

+// ShutdownInCloudProvider returns true if the node is shutdowned in
+// cloud provider.
+func ShutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
+	instances, ok := cloud.Instances()
+	if !ok {
+		return false, fmt.Errorf("%v", ErrCloudInstance)
+	}
+	shutdown, err := instances.InstanceShutdownByProviderID(ctx, node.Spec.ProviderID)
+	if err == cloudprovider.NotImplemented {
+		return false, nil
+	}
+	return shutdown, err
+}
+
 // RecordNodeEvent records a event related to a node.
 func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
 	ref := &v1.ObjectReference{
--- a/pkg/scheduler/algorithm/well_known_labels.go
+++ b/pkg/scheduler/algorithm/well_known_labels.go
@ -71,4 +71,7 @@ const (
 	// from the cloud-controller-manager intitializes this node, and then removes
 	// the taint
 	TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
+
+	// TaintNodeShutdown when node is shutdown in external cloud provider
+	TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
 )
--- a/pkg/volume/cinder/attacher_test.go
+++ b/pkg/volume/cinder/attacher_test.go
@ -728,6 +728,10 @@ func (instances *instances) InstanceExistsByProviderID(ctx context.Context, prov
 	return false, errors.New("unimplemented")
 }

+func (instances *instances) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
+	return false, errors.New("unimplemented")
+}
+
 func (instances *instances) List(filter string) ([]types.NodeName, error) {
 	return []types.NodeName{}, errors.New("Not implemented")
 }