diff --git a/pkg/controller/cloud/node_controller.go b/pkg/controller/cloud/node_controller.go index 22a2cc5cf81..99950b8e555 100644 --- a/pkg/controller/cloud/node_controller.go +++ b/pkg/controller/cloud/node_controller.go @@ -418,11 +418,16 @@ func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(ctx context.Co n.Spec.ProviderID = providerID } }) + } else if err == cloudprovider.NotImplemented { + // if the cloud provider being used does not support provider IDs, + // we can safely continue since we will attempt to set node + // addresses given the node name in getNodeAddressesByProviderIDOrName + klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name) } else { - // we should attempt to set providerID on node, but - // we can continue if we fail since we will attempt to set - // node addresses given the node name in getNodeAddressesByProviderIDOrName - klog.Errorf("failed to set node provider id: %v", err) + // if the cloud provider being used supports provider IDs, we want + // to propagate the error so that we re-try in the future; if we + // do not, the taint will be removed, and this will not be retried + return nil, err } } diff --git a/pkg/controller/cloud/node_controller_test.go b/pkg/controller/cloud/node_controller_test.go index 37470060f78..f762affe8a3 100644 --- a/pkg/controller/cloud/node_controller_test.go +++ b/pkg/controller/cloud/node_controller_test.go @@ -19,6 +19,7 @@ package cloud import ( "context" "errors" + "fmt" "reflect" "testing" "time" @@ -1152,3 +1153,168 @@ func TestNodeProviderIDAlreadySet(t *testing.T) { // CCM node controller should not overwrite provider if it's already set assert.Equal(t, "test-provider-id", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID not set correctly") } + +// This test checks that a node's provider ID will subsequently be set after an error has occurred +func TestNodeProviderIDError(t *testing.T) { + fnh := &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + { + Key: schedulerapi.TaintExternalCloudProvider, + Value: "true", + Effect: v1.TaintEffectNoSchedule, + }, + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{}), + DeleteWaitChan: make(chan struct{}), + } + + factory := informers.NewSharedInformerFactory(fnh, 0) + + fakeCloud := &fakecloud.Cloud{ + InstanceTypes: map[types.NodeName]string{}, + Addresses: []v1.NodeAddress{ + { + Type: v1.NodeHostName, + Address: "node0.cloud.internal", + }, + { + Type: v1.NodeInternalIP, + Address: "10.0.0.1", + }, + { + Type: v1.NodeExternalIP, + Address: "132.143.154.163", + }, + }, + Provider: "test", + ExtID: map[types.NodeName]string{}, + ExtIDErr: map[types.NodeName]error{ + types.NodeName("node0"): fmt.Errorf("fake error"), + }, + Err: nil, + } + + eventBroadcaster := record.NewBroadcaster() + cloudNodeController := &CloudNodeController{ + kubeClient: fnh, + nodeInformer: factory.Core().V1().Nodes(), + cloud: fakeCloud, + nodeStatusUpdateFrequency: 1 * time.Second, + recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}), + } + eventBroadcaster.StartLogging(klog.Infof) + + cloudNodeController.AddCloudNode(context.TODO(), fnh.Existing[0]) + + assert.Equal(t, 0, len(fnh.UpdatedNodes), "Node was unexpectedly updated") + + cloudNodeController.UpdateCloudNode(context.TODO(), nil, fnh.Existing[0]) + + assert.Equal(t, 0, len(fnh.UpdatedNodes), "Node was unexpectedly updated") + + fakeCloud.ExtID[types.NodeName("node0")] = "test-provider-id" + delete(fakeCloud.ExtIDErr, types.NodeName("node0")) + + cloudNodeController.UpdateCloudNode(context.TODO(), nil, fnh.Existing[0]) + + assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated") + assert.Equal(t, "node0", fnh.UpdatedNodes[0].Name, "Node was not updated") + assert.Equal(t, "test://test-provider-id", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID not set correctly") +} + +// This test checks that a NotImplemented error when getting a node's provider ID will not prevent removal of the taint +func TestNodeProviderIDNotImplemented(t *testing.T) { + fnh := &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + Spec: v1.NodeSpec{ + Taints: []v1.Taint{ + { + Key: schedulerapi.TaintExternalCloudProvider, + Value: "true", + Effect: v1.TaintEffectNoSchedule, + }, + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{}), + DeleteWaitChan: make(chan struct{}), + } + + factory := informers.NewSharedInformerFactory(fnh, 0) + + fakeCloud := &fakecloud.Cloud{ + InstanceTypes: map[types.NodeName]string{}, + Addresses: []v1.NodeAddress{ + { + Type: v1.NodeHostName, + Address: "node0.cloud.internal", + }, + { + Type: v1.NodeInternalIP, + Address: "10.0.0.1", + }, + { + Type: v1.NodeExternalIP, + Address: "132.143.154.163", + }, + }, + Provider: "test", + ExtID: map[types.NodeName]string{}, + ExtIDErr: map[types.NodeName]error{ + types.NodeName("node0"): cloudprovider.NotImplemented, + }, + Err: nil, + } + + eventBroadcaster := record.NewBroadcaster() + cloudNodeController := &CloudNodeController{ + kubeClient: fnh, + nodeInformer: factory.Core().V1().Nodes(), + cloud: fakeCloud, + nodeStatusUpdateFrequency: 1 * time.Second, + recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}), + } + eventBroadcaster.StartLogging(klog.Infof) + + cloudNodeController.AddCloudNode(context.TODO(), fnh.Existing[0]) + + assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated") + assert.Equal(t, "node0", fnh.UpdatedNodes[0].Name, "Node was not updated") + assert.Equal(t, "", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID set to unexpected value") +} diff --git a/staging/src/k8s.io/cloud-provider/cloud.go b/staging/src/k8s.io/cloud-provider/cloud.go index 38703f10302..b006c726cd6 100644 --- a/staging/src/k8s.io/cloud-provider/cloud.go +++ b/staging/src/k8s.io/cloud-provider/cloud.go @@ -98,6 +98,10 @@ func GetInstanceProviderID(ctx context.Context, cloud Interface, nodeName types. } instanceID, err := instances.InstanceID(ctx, nodeName) if err != nil { + if err == NotImplemented { + return "", err + } + return "", fmt.Errorf("failed to get instance ID from cloud provider: %v", err) } return cloud.ProviderName() + "://" + instanceID, nil diff --git a/staging/src/k8s.io/cloud-provider/fake/fake.go b/staging/src/k8s.io/cloud-provider/fake/fake.go index 10e97285a69..b473ceca6b3 100644 --- a/staging/src/k8s.io/cloud-provider/fake/fake.go +++ b/staging/src/k8s.io/cloud-provider/fake/fake.go @@ -68,6 +68,7 @@ type Cloud struct { Addresses []v1.NodeAddress addressesMux sync.Mutex ExtID map[types.NodeName]string + ExtIDErr map[types.NodeName]error InstanceTypes map[types.NodeName]string Machines []types.NodeName NodeResources *v1.NodeResources @@ -252,9 +253,17 @@ func (f *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string return f.Addresses, f.Err } -// InstanceID returns the cloud provider ID of the node with the specified Name. +// InstanceID returns the cloud provider ID of the node with the specified Name, unless an entry +// for the node exists in ExtIDError, in which case it returns the desired error (to facilitate +// testing of error handling). func (f *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) { f.addCall("instance-id") + + err, ok := f.ExtIDErr[nodeName] + if ok { + return "", err + } + return f.ExtID[nodeName], nil }