cloud-node-lifecycle controller: add missing instancev2 calls for node exists and node shutdown

A cloud provider should have the option to only implement the new
InstanceV2 interface. Currently the cloud nodelifecycle controller only
makes instancev1 calls when it should prefer instancev2 if supported and
fallback to the existing instancev1 otherwise.

Signed-off-by: Andrew Sy Kim <kim.andrewsy@gmail.com>
This commit is contained in:
Andrew Sy Kim 2020-08-02 09:34:53 -04:00
parent 82bdba9907
commit 46d522d0c8
3 changed files with 252 additions and 10 deletions

View File

@ -108,7 +108,9 @@ func NewCloudNodeController(
klog.Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
if _, ok := cloud.Instances(); !ok {
_, instancesSupported := cloud.Instances()
_, instancesV2Supported := cloud.InstancesV2()
if !instancesSupported && !instancesV2Supported {
return nil, errors.New("cloud provider does not support instances")
}

View File

@ -85,7 +85,9 @@ func NewCloudNodeLifecycleController(
return nil, errors.New("no cloud provider provided")
}
if _, ok := cloud.Instances(); !ok {
_, instancesSupported := cloud.Instances()
_, instancesV2Supported := cloud.InstancesV2()
if !instancesSupported && !instancesV2Supported {
return nil, errors.New("cloud provider does not support instances")
}
@ -118,12 +120,6 @@ func (c *CloudNodeLifecycleController) Run(stopCh <-chan struct{}) {
// or shutdown. If deleted, it deletes the node resource. If shutdown it
// applies a shutdown taint to the node
func (c *CloudNodeLifecycleController) MonitorNodes() {
instances, ok := c.cloud.Instances()
if !ok {
utilruntime.HandleError(fmt.Errorf("failed to get instances from cloud provider"))
return
}
nodes, err := c.nodeLister.List(labels.Everything())
if err != nil {
klog.Errorf("error listing nodes from cache: %s", err)
@ -148,7 +144,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
// At this point the node has NotReady status, we need to check if the node has been removed
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node
exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node)
exists, err := ensureNodeExistsByProviderID(context.TODO(), c.cloud, node)
if err != nil {
klog.Errorf("error checking if node %s exists: %v", node.Name, err)
continue
@ -195,6 +191,10 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
// shutdownInCloudProvider returns true if the node is shutdown on the cloud provider
func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
return instanceV2.InstanceShutdown(ctx, node)
}
instances, ok := cloud.Instances()
if !ok {
return false, errors.New("cloud provider does not support instances")
@ -210,7 +210,16 @@ func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface,
// ensureNodeExistsByProviderID checks if the instance exists by the provider id,
// If provider id in spec is empty it calls instanceId with node name to get provider id
func ensureNodeExistsByProviderID(ctx context.Context, instances cloudprovider.Instances, node *v1.Node) (bool, error) {
func ensureNodeExistsByProviderID(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
return instanceV2.InstanceExists(ctx, node)
}
instances, ok := cloud.Instances()
if !ok {
return false, errors.New("instances interface not supported in the cloud provider")
}
providerID := node.Spec.ProviderID
if providerID == "" {
var err error

View File

@ -269,6 +269,237 @@ func Test_NodesDeleted(t *testing.T) {
ExistsByProviderID: false,
},
},
{
name: "[instance2] node is not ready and does not exist",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
{
name: "[instancev2] node is not ready and provider returns err",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
ErrByProviderID: errors.New("err!"),
},
},
{
name: "[instancev2] node is not ready but still exists",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: true,
},
},
{
name: "[instancev2] node ready condition is unknown, node doesn't exist",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
{
name: "[instancev2] node ready condition is unknown, node exists",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
NodeShutdown: false,
ExistsByProviderID: true,
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "foo://12345",
},
},
},
{
name: "[instancev2] node is ready, but provider said it is deleted (maybe a bug in provider)",
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
expectedDeleted: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
ExistsByProviderID: false,
},
},
}
for _, testcase := range testcases {