Merge pull request #59968 from kubernetes/revert-59323-nodetaint

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Revert "add node shutdown taint"

Reverts kubernetes/kubernetes#59323

Node becomes unready, but is never removed. I've found the following in [kube-controller-manager.log](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-e2e-gci-gce-autoscaling/6055/artifacts/bootstrap-e2e-master/cluster-autoscaler.log) from test run for one such node:

`E0216 01:14:27.084923       1 node_lifecycle_controller.go:686] Error determining if node bootstrap-e2e-minion-group-01b1 shutdown in cloud: failed to get instance ID from cloud provider: instance not found`

This goes on for the rest of the run (~6h). Looks like the node is stuck in Unready state because of this check: https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/nodelifecycle/node_lifecycle_controller.go#L684. Previously, there was no such check and the node was removed.

Reverting as this would affect all users attempting to resize their node groups on GCE.

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-02-16 20:12:56 -08:00 committed by GitHub
commit 6d0b71740f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 13 additions and 393 deletions

View File

@ -148,8 +148,6 @@ type Instances interface {
// InstanceExistsByProviderID returns true if the instance for the given provider id still is running. // InstanceExistsByProviderID returns true if the instance for the given provider id still is running.
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager. // If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error)
// InstanceShutdownByProviderID returns true if the instance is shutdown in cloudprovider
InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error)
} }
// Route is a representation of an advanced routing rule. // Route is a representation of an advanced routing rule.

View File

@ -1368,11 +1368,6 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin
return true, nil return true, nil
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (c *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the node with the specified nodeName. // InstanceID returns the cloud provider ID of the node with the specified nodeName.
func (c *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) { func (c *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
// In the future it is possible to also return an endpoint as: // In the future it is possible to also return an endpoint as:

View File

@ -103,11 +103,6 @@ func (az *Cloud) isCurrentInstance(name types.NodeName) (bool, error) {
return (metadataName == nodeName), err return (metadataName == nodeName), err
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (az *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the specified instance. // InstanceID returns the cloud provider ID of the specified instance.
// Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound) // Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound)
func (az *Cloud) InstanceID(ctx context.Context, name types.NodeName) (string, error) { func (az *Cloud) InstanceID(ctx context.Context, name types.NodeName) (string, error) {

View File

@ -158,8 +158,3 @@ func (cs *CSCloud) InstanceExistsByProviderID(ctx context.Context, providerID st
return true, nil return true, nil
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (cs *CSCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}

View File

@ -119,11 +119,6 @@ func (m *metadata) InstanceExistsByProviderID(ctx context.Context, providerID st
return false, errors.New("InstanceExistsByProviderID not implemented") return false, errors.New("InstanceExistsByProviderID not implemented")
} }
// InstanceShutdownByProviderID returns if the instance is shutdown.
func (m *metadata) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// GetZone returns the Zone containing the region that the program is running in. // GetZone returns the Zone containing the region that the program is running in.
func (m *metadata) GetZone(ctx context.Context) (cloudprovider.Zone, error) { func (m *metadata) GetZone(ctx context.Context) (cloudprovider.Zone, error) {
zone := cloudprovider.Zone{} zone := cloudprovider.Zone{}

View File

@ -50,10 +50,8 @@ type FakeCloud struct {
Exists bool Exists bool
Err error Err error
ExistsByProviderID bool ExistsByProviderID bool
ErrByProviderID error ErrByProviderID error
NodeShutdown bool
ErrShutdownByProviderID error
Calls []string Calls []string
Addresses []v1.NodeAddress Addresses []v1.NodeAddress
@ -243,12 +241,6 @@ func (f *FakeCloud) InstanceExistsByProviderID(ctx context.Context, providerID s
return f.ExistsByProviderID, f.ErrByProviderID return f.ExistsByProviderID, f.ErrByProviderID
} }
// InstanceShutdownByProviderID returns true if the instances is in safe state to detach volumes
func (f *FakeCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
f.addCall("instance-shutdown-by-provider-id")
return f.NodeShutdown, f.ErrShutdownByProviderID
}
// List is a test-spy implementation of Instances.List. // List is a test-spy implementation of Instances.List.
// It adds an entry "list" into the internal method call record. // It adds an entry "list" into the internal method call record.
func (f *FakeCloud) List(filter string) ([]types.NodeName, error) { func (f *FakeCloud) List(filter string) ([]types.NodeName, error) {

View File

@ -190,11 +190,6 @@ func (gce *GCECloud) InstanceExistsByProviderID(ctx context.Context, providerID
return true, nil return true, nil
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (gce *GCECloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the node with the specified NodeName. // InstanceID returns the cloud provider ID of the node with the specified NodeName.
func (gce *GCECloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) { func (gce *GCECloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
instanceName := mapNodeNameToInstanceName(nodeName) instanceName := mapNodeNameToInstanceName(nodeName)

View File

@ -141,11 +141,6 @@ func (i *Instances) InstanceExistsByProviderID(ctx context.Context, providerID s
return true, nil return true, nil
} }
// InstanceShutdownByProviderID returns true if the instances is in safe state to detach volumes
func (i *Instances) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the kubelet's cloud provider ID. // InstanceID returns the kubelet's cloud provider ID.
func (os *OpenStack) InstanceID() (string, error) { func (os *OpenStack) InstanceID() (string, error) {
if len(os.localInstanceID) == 0 { if len(os.localInstanceID) == 0 {
@ -160,8 +155,7 @@ func (os *OpenStack) InstanceID() (string, error) {
// InstanceID returns the cloud provider ID of the specified instance. // InstanceID returns the cloud provider ID of the specified instance.
func (i *Instances) InstanceID(ctx context.Context, name types.NodeName) (string, error) { func (i *Instances) InstanceID(ctx context.Context, name types.NodeName) (string, error) {
// we should fetch instanceid from all states instead of ACTIVE srv, err := getServerByName(i.compute, name, true)
srv, err := getServerByName(i.compute, name, false)
if err != nil { if err != nil {
if err == ErrNotFound { if err == ErrNotFound {
return "", cloudprovider.InstanceNotFound return "", cloudprovider.InstanceNotFound

View File

@ -212,11 +212,6 @@ func (v *OVirtCloud) InstanceExistsByProviderID(ctx context.Context, providerID
return false, cloudprovider.NotImplemented return false, cloudprovider.NotImplemented
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (v *OVirtCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the node with the specified NodeName. // InstanceID returns the cloud provider ID of the node with the specified NodeName.
func (v *OVirtCloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) { func (v *OVirtCloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
name := mapNodeNameToInstanceName(nodeName) name := mapNodeNameToInstanceName(nodeName)

View File

@ -477,11 +477,6 @@ func (pc *PCCloud) InstanceExistsByProviderID(ctx context.Context, providerID st
return false, cloudprovider.NotImplemented return false, cloudprovider.NotImplemented
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (pc *PCCloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the specified instance. // InstanceID returns the cloud provider ID of the specified instance.
func (pc *PCCloud) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) { func (pc *PCCloud) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) {
name := string(nodeName) name := string(nodeName)

View File

@ -609,11 +609,6 @@ func (vs *VSphere) InstanceExistsByProviderID(ctx context.Context, providerID st
return false, err return false, err
} }
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
func (vs *VSphere) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, cloudprovider.NotImplemented
}
// InstanceID returns the cloud provider ID of the node with the specified Name. // InstanceID returns the cloud provider ID of the node with the specified Name.
func (vs *VSphere) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) { func (vs *VSphere) InstanceID(ctx context.Context, nodeName k8stypes.NodeName) (string, error) {

View File

@ -37,23 +37,16 @@ import (
clientretry "k8s.io/client-go/util/retry" clientretry "k8s.io/client-go/util/retry"
nodeutilv1 "k8s.io/kubernetes/pkg/api/v1/node" nodeutilv1 "k8s.io/kubernetes/pkg/api/v1/node"
"k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm" "k8s.io/kubernetes/pkg/scheduler/algorithm"
nodeutil "k8s.io/kubernetes/pkg/util/node" nodeutil "k8s.io/kubernetes/pkg/util/node"
) )
var ( var UpdateNodeSpecBackoff = wait.Backoff{
UpdateNodeSpecBackoff = wait.Backoff{ Steps: 20,
Steps: 20, Duration: 50 * time.Millisecond,
Duration: 50 * time.Millisecond, Jitter: 1.0,
Jitter: 1.0} }
ShutDownTaint = &v1.Taint{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
}
)
type CloudNodeController struct { type CloudNodeController struct {
nodeInformer coreinformers.NodeInformer nodeInformer coreinformers.NodeInformer
@ -250,28 +243,9 @@ func (cnc *CloudNodeController) MonitorNode() {
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node immediately // from the cloud provider. If node cannot be found in cloudprovider, then delete the node immediately
if currentReadyCondition != nil { if currentReadyCondition != nil {
if currentReadyCondition.Status != v1.ConditionTrue { if currentReadyCondition.Status != v1.ConditionTrue {
// we need to check this first to get taint working in similar in all cloudproviders
// current problem is that shutdown nodes are not working in similar way ie. all cloudproviders
// does not delete node from kubernetes cluster when instance it is shutdown see issue #46442
exists, err := instances.InstanceShutdownByProviderID(context.TODO(), node.Spec.ProviderID)
if err != nil && err != cloudprovider.NotImplemented {
glog.Errorf("Error getting data for node %s from cloud: %v", node.Name, err)
continue
}
if exists {
// if node is shutdown add shutdown taint
err = controller.AddOrUpdateTaintOnNode(cnc.kubeClient, node.Name, ShutDownTaint)
if err != nil {
glog.Errorf("Error patching node taints: %v", err)
}
// Continue checking the remaining nodes since the current one is fine.
continue
}
// Check with the cloud provider to see if the node still exists. If it // Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node immediately. // doesn't, delete the node immediately.
exists, err = ensureNodeExistsByProviderIDOrExternalID(instances, node) exists, err := ensureNodeExistsByProviderIDOrExternalID(instances, node)
if err != nil { if err != nil {
glog.Errorf("Error getting data for node %s from cloud: %v", node.Name, err) glog.Errorf("Error getting data for node %s from cloud: %v", node.Name, err)
continue continue
@ -301,12 +275,6 @@ func (cnc *CloudNodeController) MonitorNode() {
} }
}(node.Name) }(node.Name)
} else {
// if taint exist remove taint
err = controller.RemoveTaintOffNode(cnc.kubeClient, node.Name, node, ShutDownTaint)
if err != nil {
glog.Errorf("Error patching node taints: %v", err)
}
} }
} }
} }

View File

@ -148,115 +148,6 @@ func TestEnsureNodeExistsByProviderIDOrNodeName(t *testing.T) {
} }
func TestNodeShutdown(t *testing.T) {
testCases := []struct {
testName string
node *v1.Node
existsByProviderID bool
shutdown bool
}{
{
testName: "node shutdowned add taint",
existsByProviderID: true,
shutdown: true,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
{
testName: "node started after shutdown remove taint",
existsByProviderID: true,
shutdown: false,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
Taints: []v1.Taint{
{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
},
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
fc := &fakecloud.FakeCloud{
ExistsByProviderID: tc.existsByProviderID,
NodeShutdown: tc.shutdown,
}
fnh := &testutil.FakeNodeHandler{
Existing: []*v1.Node{tc.node},
Clientset: fake.NewSimpleClientset(),
PatchWaitChan: make(chan struct{}),
}
factory := informers.NewSharedInformerFactory(fnh, controller.NoResyncPeriodFunc())
eventBroadcaster := record.NewBroadcaster()
cloudNodeController := &CloudNodeController{
kubeClient: fnh,
nodeInformer: factory.Core().V1().Nodes(),
cloud: fc,
nodeMonitorPeriod: 1 * time.Second,
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}),
nodeStatusUpdateFrequency: 1 * time.Second,
}
eventBroadcaster.StartLogging(glog.Infof)
cloudNodeController.Run()
select {
case <-fnh.PatchWaitChan:
case <-time.After(1 * time.Second):
t.Errorf("Timed out waiting %v for node to be updated", wait.ForeverTestTimeout)
}
assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated")
if tc.shutdown {
assert.Equal(t, 1, len(fnh.UpdatedNodes[0].Spec.Taints), "Node Taint was not added")
assert.Equal(t, "node.cloudprovider.kubernetes.io/shutdown", fnh.UpdatedNodes[0].Spec.Taints[0].Key, "Node Taint key is not correct")
} else {
assert.Equal(t, 0, len(fnh.UpdatedNodes[0].Spec.Taints), "Node Taint was not removed after node is back in ready state")
}
})
}
}
// This test checks that the node is deleted when kubelet stops reporting // This test checks that the node is deleted when kubelet stops reporting
// and cloud provider says node is gone // and cloud provider says node is gone
func TestNodeDeleted(t *testing.T) { func TestNodeDeleted(t *testing.T) {

View File

@ -79,11 +79,6 @@ var (
Effect: v1.TaintEffectNoExecute, Effect: v1.TaintEffectNoExecute,
} }
shutDownTaint = &v1.Taint{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
}
nodeConditionToTaintKeyMap = map[v1.NodeConditionType]string{ nodeConditionToTaintKeyMap = map[v1.NodeConditionType]string{
v1.NodeMemoryPressure: algorithm.TaintNodeMemoryPressure, v1.NodeMemoryPressure: algorithm.TaintNodeMemoryPressure,
v1.NodeOutOfDisk: algorithm.TaintNodeOutOfDisk, v1.NodeOutOfDisk: algorithm.TaintNodeOutOfDisk,
@ -156,10 +151,9 @@ type Controller struct {
daemonSetStore extensionslisters.DaemonSetLister daemonSetStore extensionslisters.DaemonSetLister
daemonSetInformerSynced cache.InformerSynced daemonSetInformerSynced cache.InformerSynced
nodeLister corelisters.NodeLister nodeLister corelisters.NodeLister
nodeInformerSynced cache.InformerSynced nodeInformerSynced cache.InformerSynced
nodeExistsInCloudProvider func(types.NodeName) (bool, error) nodeExistsInCloudProvider func(types.NodeName) (bool, error)
nodeShutdownInCloudProvider func(types.NodeName) (bool, error)
recorder record.EventRecorder recorder record.EventRecorder
@ -245,9 +239,6 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) { nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ExistsInCloudProvider(cloud, nodeName) return nodeutil.ExistsInCloudProvider(cloud, nodeName)
}, },
nodeShutdownInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ShutdownInCloudProvider(cloud, nodeName)
},
recorder: recorder, recorder: recorder,
nodeMonitorPeriod: nodeMonitorPeriod, nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod, nodeStartupGracePeriod: nodeStartupGracePeriod,
@ -662,11 +653,6 @@ func (nc *Controller) monitorNodeStatus() error {
glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
} }
} }
// remove shutdown taint this is needed always depending do we use taintbased or not
err := nc.markNodeAsNotShutdown(node)
if err != nil {
glog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
}
} }
// Report node event. // Report node event.
@ -680,21 +666,7 @@ func (nc *Controller) monitorNodeStatus() error {
// Check with the cloud provider to see if the node still exists. If it // Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node immediately. // doesn't, delete the node immediately.
if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil { if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil {
// check is node shutdowned, if yes do not deleted it. Instead add taint exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
exists, err := nc.nodeShutdownInCloudProvider(types.NodeName(node.Name))
if err != nil && err != cloudprovider.NotImplemented {
glog.Errorf("Error determining if node %v shutdown in cloud: %v", node.Name, err)
continue
}
// node shutdown
if exists {
err = controller.AddOrUpdateTaintOnNode(nc.kubeClient, node.Name, shutDownTaint)
if err != nil {
glog.Errorf("Error patching node taints: %v", err)
}
continue
}
exists, err = nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
if err != nil { if err != nil {
glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
continue continue
@ -1130,17 +1102,6 @@ func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Remove(node.Name), nil return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Remove(node.Name), nil
} }
func (nc *Controller) markNodeAsNotShutdown(node *v1.Node) error {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, shutDownTaint)
if err != nil {
glog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return err
}
return nil
}
// ComputeZoneState returns a slice of NodeReadyConditions for all Nodes in a given zone. // ComputeZoneState returns a slice of NodeReadyConditions for all Nodes in a given zone.
// The zone is considered: // The zone is considered:
// - fullyDisrupted if there're no Ready Nodes, // - fullyDisrupted if there're no Ready Nodes,

View File

@ -1360,118 +1360,6 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
} }
} }
func TestCloudProviderNodeShutdown(t *testing.T) {
testCases := []struct {
testName string
node *v1.Node
shutdown bool
}{
{
testName: "node shutdowned add taint",
shutdown: true,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
{
testName: "node started after shutdown remove taint",
shutdown: false,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
Taints: []v1.Taint{
{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
},
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
fnh := &testutil.FakeNodeHandler{
Existing: []*v1.Node{tc.node},
Clientset: fake.NewSimpleClientset(),
}
nodeController, _ := newNodeLifecycleControllerFromClient(
nil,
fnh,
10*time.Minute,
testRateLimiterQPS,
testRateLimiterQPS,
testLargeClusterThreshold,
testUnhealthyThreshold,
testNodeMonitorGracePeriod,
testNodeStartupGracePeriod,
testNodeMonitorPeriod,
false)
nodeController.cloud = &fakecloud.FakeCloud{}
nodeController.now = func() metav1.Time { return metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) }
nodeController.recorder = testutil.NewFakeRecorder()
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return tc.shutdown, nil
}
if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if len(fnh.UpdatedNodes) != 1 {
t.Errorf("Node was not updated")
}
if tc.shutdown {
if len(fnh.UpdatedNodes[0].Spec.Taints) != 1 {
t.Errorf("Node Taint was not added")
}
if fnh.UpdatedNodes[0].Spec.Taints[0].Key != "node.cloudprovider.kubernetes.io/shutdown" {
t.Errorf("Node Taint key is not correct")
}
} else {
if len(fnh.UpdatedNodes[0].Spec.Taints) != 0 {
t.Errorf("Node Taint was not removed after node is back in ready state")
}
}
})
}
}
// TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes // TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
// pods and the node when kubelet has not reported, and the cloudprovider says // pods and the node when kubelet has not reported, and the cloudprovider says
// the node is gone. // the node is gone.
@ -1516,9 +1404,6 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) { nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil return false, nil
} }
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
// monitorNodeStatus should allow this node to be immediately deleted // monitorNodeStatus should allow this node to be immediately deleted
if err := nodeController.syncNodeStore(fnh); err != nil { if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
@ -2357,9 +2242,6 @@ func TestNodeEventGeneration(t *testing.T) {
nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) { nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil return false, nil
} }
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
fakeRecorder := testutil.NewFakeRecorder() fakeRecorder := testutil.NewFakeRecorder()
nodeController.recorder = fakeRecorder nodeController.recorder = fakeRecorder

View File

@ -67,7 +67,6 @@ type FakeNodeHandler struct {
// Synchronization // Synchronization
lock sync.Mutex lock sync.Mutex
DeleteWaitChan chan struct{} DeleteWaitChan chan struct{}
PatchWaitChan chan struct{}
} }
// FakeLegacyHandler is a fake implemtation of CoreV1Interface. // FakeLegacyHandler is a fake implemtation of CoreV1Interface.
@ -271,9 +270,6 @@ func (m *FakeNodeHandler) Patch(name string, pt types.PatchType, data []byte, su
m.lock.Lock() m.lock.Lock()
defer func() { defer func() {
m.RequestCount++ m.RequestCount++
if m.PatchWaitChan != nil {
m.PatchWaitChan <- struct{}{}
}
m.lock.Unlock() m.lock.Unlock()
}() }()
var nodeCopy v1.Node var nodeCopy v1.Node

View File

@ -187,21 +187,6 @@ func ExistsInCloudProvider(cloud cloudprovider.Interface, nodeName types.NodeNam
return true, nil return true, nil
} }
// ShutdownInCloudProvider returns true if the node is shutdowned in
// cloud provider.
func ShutdownInCloudProvider(cloud cloudprovider.Interface, nodeName types.NodeName) (bool, error) {
instances, ok := cloud.Instances()
if !ok {
return false, fmt.Errorf("%v", ErrCloudInstance)
}
providerID, err := cloudprovider.GetInstanceProviderID(context.TODO(), cloud, nodeName)
if err != nil {
return false, err
}
shutdown, err := instances.InstanceShutdownByProviderID(context.TODO(), providerID)
return shutdown, err
}
// RecordNodeEvent records a event related to a node. // RecordNodeEvent records a event related to a node.
func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) { func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
ref := &v1.ObjectReference{ ref := &v1.ObjectReference{

View File

@ -61,7 +61,4 @@ const (
// from the cloud-controller-manager intitializes this node, and then removes // from the cloud-controller-manager intitializes this node, and then removes
// the taint // the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized" TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
) )

View File

@ -732,10 +732,6 @@ func (instances *instances) InstanceExistsByProviderID(ctx context.Context, prov
return false, errors.New("unimplemented") return false, errors.New("unimplemented")
} }
func (instances *instances) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
return false, errors.New("unimplemented")
}
func (instances *instances) List(filter string) ([]types.NodeName, error) { func (instances *instances) List(filter string) ([]types.NodeName, error) {
return []types.NodeName{}, errors.New("Not implemented") return []types.NodeName{}, errors.New("Not implemented")
} }