Merge pull request #87043 from zjs/topic/propagate-providerid-errors

Ensure a provider ID is set on a node if expected
This commit is contained in:
Kubernetes Prow Robot 2020-01-10 14:33:50 -08:00 committed by GitHub
commit 240782c7f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 189 additions and 5 deletions

View File

@ -418,11 +418,16 @@ func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(ctx context.Co
n.Spec.ProviderID = providerID
}
})
} else if err == cloudprovider.NotImplemented {
// if the cloud provider being used does not support provider IDs,
// we can safely continue since we will attempt to set node
// addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
} else {
// we should attempt to set providerID on node, but
// we can continue if we fail since we will attempt to set
// node addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Errorf("failed to set node provider id: %v", err)
// if the cloud provider being used supports provider IDs, we want
// to propagate the error so that we re-try in the future; if we
// do not, the taint will be removed, and this will not be retried
return nil, err
}
}

View File

@ -19,6 +19,7 @@ package cloud
import (
"context"
"errors"
"fmt"
"reflect"
"testing"
"time"
@ -1152,3 +1153,168 @@ func TestNodeProviderIDAlreadySet(t *testing.T) {
// CCM node controller should not overwrite provider if it's already set
assert.Equal(t, "test-provider-id", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID not set correctly")
}
// This test checks that a node's provider ID will subsequently be set after an error has occurred
func TestNodeProviderIDError(t *testing.T) {
fnh := &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: schedulerapi.TaintExternalCloudProvider,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&v1.PodList{}),
DeleteWaitChan: make(chan struct{}),
}
factory := informers.NewSharedInformerFactory(fnh, 0)
fakeCloud := &fakecloud.Cloud{
InstanceTypes: map[types.NodeName]string{},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Provider: "test",
ExtID: map[types.NodeName]string{},
ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): fmt.Errorf("fake error"),
},
Err: nil,
}
eventBroadcaster := record.NewBroadcaster()
cloudNodeController := &CloudNodeController{
kubeClient: fnh,
nodeInformer: factory.Core().V1().Nodes(),
cloud: fakeCloud,
nodeStatusUpdateFrequency: 1 * time.Second,
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}),
}
eventBroadcaster.StartLogging(klog.Infof)
cloudNodeController.AddCloudNode(context.TODO(), fnh.Existing[0])
assert.Equal(t, 0, len(fnh.UpdatedNodes), "Node was unexpectedly updated")
cloudNodeController.UpdateCloudNode(context.TODO(), nil, fnh.Existing[0])
assert.Equal(t, 0, len(fnh.UpdatedNodes), "Node was unexpectedly updated")
fakeCloud.ExtID[types.NodeName("node0")] = "test-provider-id"
delete(fakeCloud.ExtIDErr, types.NodeName("node0"))
cloudNodeController.UpdateCloudNode(context.TODO(), nil, fnh.Existing[0])
assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated")
assert.Equal(t, "node0", fnh.UpdatedNodes[0].Name, "Node was not updated")
assert.Equal(t, "test://test-provider-id", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID not set correctly")
}
// This test checks that a NotImplemented error when getting a node's provider ID will not prevent removal of the taint
func TestNodeProviderIDNotImplemented(t *testing.T) {
fnh := &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: schedulerapi.TaintExternalCloudProvider,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&v1.PodList{}),
DeleteWaitChan: make(chan struct{}),
}
factory := informers.NewSharedInformerFactory(fnh, 0)
fakeCloud := &fakecloud.Cloud{
InstanceTypes: map[types.NodeName]string{},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Provider: "test",
ExtID: map[types.NodeName]string{},
ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): cloudprovider.NotImplemented,
},
Err: nil,
}
eventBroadcaster := record.NewBroadcaster()
cloudNodeController := &CloudNodeController{
kubeClient: fnh,
nodeInformer: factory.Core().V1().Nodes(),
cloud: fakeCloud,
nodeStatusUpdateFrequency: 1 * time.Second,
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"}),
}
eventBroadcaster.StartLogging(klog.Infof)
cloudNodeController.AddCloudNode(context.TODO(), fnh.Existing[0])
assert.Equal(t, 1, len(fnh.UpdatedNodes), "Node was not updated")
assert.Equal(t, "node0", fnh.UpdatedNodes[0].Name, "Node was not updated")
assert.Equal(t, "", fnh.UpdatedNodes[0].Spec.ProviderID, "Node ProviderID set to unexpected value")
}

View File

@ -98,6 +98,10 @@ func GetInstanceProviderID(ctx context.Context, cloud Interface, nodeName types.
}
instanceID, err := instances.InstanceID(ctx, nodeName)
if err != nil {
if err == NotImplemented {
return "", err
}
return "", fmt.Errorf("failed to get instance ID from cloud provider: %v", err)
}
return cloud.ProviderName() + "://" + instanceID, nil

View File

@ -68,6 +68,7 @@ type Cloud struct {
Addresses []v1.NodeAddress
addressesMux sync.Mutex
ExtID map[types.NodeName]string
ExtIDErr map[types.NodeName]error
InstanceTypes map[types.NodeName]string
Machines []types.NodeName
NodeResources *v1.NodeResources
@ -252,9 +253,17 @@ func (f *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string
return f.Addresses, f.Err
}
// InstanceID returns the cloud provider ID of the node with the specified Name.
// InstanceID returns the cloud provider ID of the node with the specified Name, unless an entry
// for the node exists in ExtIDError, in which case it returns the desired error (to facilitate
// testing of error handling).
func (f *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
f.addCall("instance-id")
err, ok := f.ExtIDErr[nodeName]
if ok {
return "", err
}
return f.ExtID[nodeName], nil
}