From 46d522d0c8e94edb6d29606a28785e35259badd7 Mon Sep 17 00:00:00 2001 From: Andrew Sy Kim Date: Sun, 2 Aug 2020 09:34:53 -0400 Subject: [PATCH] cloud-node-lifecycle controller: add missing instancev2 calls for node exists and node shutdown A cloud provider should have the option to only implement the new InstanceV2 interface. Currently the cloud nodelifecycle controller only makes instancev1 calls when it should prefer instancev2 if supported and fallback to the existing instancev1 otherwise. Signed-off-by: Andrew Sy Kim --- .../controllers/node/node_controller.go | 4 +- .../node_lifecycle_controller.go | 27 +- .../node_lifecycle_controller_test.go | 231 ++++++++++++++++++ 3 files changed, 252 insertions(+), 10 deletions(-) diff --git a/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go b/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go index de553979500..7aa0e8f5b70 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go @@ -108,7 +108,9 @@ func NewCloudNodeController( klog.Infof("Sending events to api server.") eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) - if _, ok := cloud.Instances(); !ok { + _, instancesSupported := cloud.Instances() + _, instancesV2Supported := cloud.InstancesV2() + if !instancesSupported && !instancesV2Supported { return nil, errors.New("cloud provider does not support instances") } diff --git a/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go b/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go index 3df72855a0b..332ee8bc4d8 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go @@ -85,7 +85,9 @@ func NewCloudNodeLifecycleController( return nil, errors.New("no cloud provider provided") } - if _, ok := cloud.Instances(); !ok { + _, instancesSupported := cloud.Instances() + _, instancesV2Supported := cloud.InstancesV2() + if !instancesSupported && !instancesV2Supported { return nil, errors.New("cloud provider does not support instances") } @@ -118,12 +120,6 @@ func (c *CloudNodeLifecycleController) Run(stopCh <-chan struct{}) { // or shutdown. If deleted, it deletes the node resource. If shutdown it // applies a shutdown taint to the node func (c *CloudNodeLifecycleController) MonitorNodes() { - instances, ok := c.cloud.Instances() - if !ok { - utilruntime.HandleError(fmt.Errorf("failed to get instances from cloud provider")) - return - } - nodes, err := c.nodeLister.List(labels.Everything()) if err != nil { klog.Errorf("error listing nodes from cache: %s", err) @@ -148,7 +144,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes() { // At this point the node has NotReady status, we need to check if the node has been removed // from the cloud provider. If node cannot be found in cloudprovider, then delete the node - exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node) + exists, err := ensureNodeExistsByProviderID(context.TODO(), c.cloud, node) if err != nil { klog.Errorf("error checking if node %s exists: %v", node.Name, err) continue @@ -195,6 +191,10 @@ func (c *CloudNodeLifecycleController) MonitorNodes() { // shutdownInCloudProvider returns true if the node is shutdown on the cloud provider func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) { + if instanceV2, ok := cloud.InstancesV2(); ok { + return instanceV2.InstanceShutdown(ctx, node) + } + instances, ok := cloud.Instances() if !ok { return false, errors.New("cloud provider does not support instances") @@ -210,7 +210,16 @@ func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, // ensureNodeExistsByProviderID checks if the instance exists by the provider id, // If provider id in spec is empty it calls instanceId with node name to get provider id -func ensureNodeExistsByProviderID(ctx context.Context, instances cloudprovider.Instances, node *v1.Node) (bool, error) { +func ensureNodeExistsByProviderID(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) { + if instanceV2, ok := cloud.InstancesV2(); ok { + return instanceV2.InstanceExists(ctx, node) + } + + instances, ok := cloud.Instances() + if !ok { + return false, errors.New("instances interface not supported in the cloud provider") + } + providerID := node.Spec.ProviderID if providerID == "" { var err error diff --git a/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller_test.go b/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller_test.go index b12fde0e943..8059fa0379f 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller_test.go +++ b/staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller_test.go @@ -269,6 +269,237 @@ func Test_NodesDeleted(t *testing.T) { ExistsByProviderID: false, }, }, + { + name: "[instance2] node is not ready and does not exist", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: true, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + ExistsByProviderID: false, + }, + }, + { + name: "[instancev2] node is not ready and provider returns err", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: false, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + ExistsByProviderID: false, + ErrByProviderID: errors.New("err!"), + }, + }, + { + name: "[instancev2] node is not ready but still exists", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: false, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + ExistsByProviderID: true, + }, + }, + { + name: "[instancev2] node ready condition is unknown, node doesn't exist", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: true, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + ExistsByProviderID: false, + }, + }, + { + name: "[instancev2] node ready condition is unknown, node exists", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: false, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + NodeShutdown: false, + ExistsByProviderID: true, + ExtID: map[types.NodeName]string{ + types.NodeName("node0"): "foo://12345", + }, + }, + }, + { + name: "[instancev2] node is ready, but provider said it is deleted (maybe a bug in provider)", + existingNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "node0", + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + expectedDeleted: false, + fakeCloud: &fakecloud.Cloud{ + EnableInstancesV2: true, + ExistsByProviderID: false, + }, + }, } for _, testcase := range testcases {