cloud-node-lifecycle controller: add missing instancev2 calls for node exists and node shutdown

A cloud provider should have the option to only implement the new
InstanceV2 interface. Currently the cloud nodelifecycle controller only
makes instancev1 calls when it should prefer instancev2 if supported and
fallback to the existing instancev1 otherwise.

Signed-off-by: Andrew Sy Kim <kim.andrewsy@gmail.com>
This commit is contained in:
Andrew Sy Kim 2020-08-02 09:34:53 -04:00
parent 82bdba9907
commit 46d522d0c8
3 changed files with 252 additions and 10 deletions

View File

@ -108,7 +108,9 @@ func NewCloudNodeController(
klog.Infof("Sending events to api server.") klog.Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
if _, ok := cloud.Instances(); !ok { _, instancesSupported := cloud.Instances()
_, instancesV2Supported := cloud.InstancesV2()
if !instancesSupported && !instancesV2Supported {
return nil, errors.New("cloud provider does not support instances") return nil, errors.New("cloud provider does not support instances")
} }

View File

@ -85,7 +85,9 @@ func NewCloudNodeLifecycleController(
return nil, errors.New("no cloud provider provided") return nil, errors.New("no cloud provider provided")
} }
if _, ok := cloud.Instances(); !ok { _, instancesSupported := cloud.Instances()
_, instancesV2Supported := cloud.InstancesV2()
if !instancesSupported && !instancesV2Supported {
return nil, errors.New("cloud provider does not support instances") return nil, errors.New("cloud provider does not support instances")
} }
@ -118,12 +120,6 @@ func (c *CloudNodeLifecycleController) Run(stopCh <-chan struct{}) {
// or shutdown. If deleted, it deletes the node resource. If shutdown it // or shutdown. If deleted, it deletes the node resource. If shutdown it
// applies a shutdown taint to the node // applies a shutdown taint to the node
func (c *CloudNodeLifecycleController) MonitorNodes() { func (c *CloudNodeLifecycleController) MonitorNodes() {
instances, ok := c.cloud.Instances()
if !ok {
utilruntime.HandleError(fmt.Errorf("failed to get instances from cloud provider"))
return
}
nodes, err := c.nodeLister.List(labels.Everything()) nodes, err := c.nodeLister.List(labels.Everything())
if err != nil { if err != nil {
klog.Errorf("error listing nodes from cache: %s", err) klog.Errorf("error listing nodes from cache: %s", err)
@ -148,7 +144,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
// At this point the node has NotReady status, we need to check if the node has been removed // At this point the node has NotReady status, we need to check if the node has been removed
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node // from the cloud provider. If node cannot be found in cloudprovider, then delete the node
exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node) exists, err := ensureNodeExistsByProviderID(context.TODO(), c.cloud, node)
if err != nil { if err != nil {
klog.Errorf("error checking if node %s exists: %v", node.Name, err) klog.Errorf("error checking if node %s exists: %v", node.Name, err)
continue continue
@ -195,6 +191,10 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
// shutdownInCloudProvider returns true if the node is shutdown on the cloud provider // shutdownInCloudProvider returns true if the node is shutdown on the cloud provider
func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) { func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
return instanceV2.InstanceShutdown(ctx, node)
}
instances, ok := cloud.Instances() instances, ok := cloud.Instances()
if !ok { if !ok {
return false, errors.New("cloud provider does not support instances") return false, errors.New("cloud provider does not support instances")
@ -210,7 +210,16 @@ func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface,
// ensureNodeExistsByProviderID checks if the instance exists by the provider id, // ensureNodeExistsByProviderID checks if the instance exists by the provider id,
// If provider id in spec is empty it calls instanceId with node name to get provider id // If provider id in spec is empty it calls instanceId with node name to get provider id
func ensureNodeExistsByProviderID(ctx context.Context, instances cloudprovider.Instances, node *v1.Node) (bool, error) { func ensureNodeExistsByProviderID(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
return instanceV2.InstanceExists(ctx, node)
}
instances, ok := cloud.Instances()
if !ok {
return false, errors.New("instances interface not supported in the cloud provider")
}
providerID := node.Spec.ProviderID providerID := node.Spec.ProviderID
if providerID == "" { if providerID == "" {
var err error var err error

View File

@ -269,6 +269,237 @@ func Test_NodesDeleted(t *testing.T) {
ExistsByProviderID: false, ExistsByProviderID: false,
}, },
}, },
{
name: "[instance2] node is not ready and does not exist",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
{
name: "[instancev2] node is not ready and provider returns err",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
ErrByProviderID: errors.New("err!"),
},
},
{
name: "[instancev2] node is not ready but still exists",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: true,
},
},
{
name: "[instancev2] node ready condition is unknown, node doesn't exist",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
{
name: "[instancev2] node ready condition is unknown, node exists",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
NodeShutdown: false,
ExistsByProviderID: true,
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "foo://12345",
},
},
},
{
name: "[instancev2] node is ready, but provider said it is deleted (maybe a bug in provider)",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
} }
for _, testcase := range testcases { for _, testcase := range testcases {