Merge pull request #123713 from aojea/missing_providerid

node-controller require providerID to initialize a node
This commit is contained in:
Kubernetes Prow Robot 2024-03-05 14:45:53 -08:00 committed by GitHub
commit 751e3e9745
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 223 additions and 68 deletions

View File

@ -226,6 +226,7 @@ func (cnc *CloudNodeController) processNextWorkItem() bool {
if err := cnc.syncHandler(key); err != nil {
// Put the item back on the workqueue to handle any transient errors.
cnc.workqueue.AddRateLimited(key)
klog.Infof("error syncing '%s': %v, requeuing", key, err)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
}
@ -424,12 +425,8 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
klog.Infof("Initializing node %s with cloud provider", nodeName)
copyNode := curNode.DeepCopy()
providerID, err := cnc.getProviderID(ctx, copyNode)
if err != nil {
return fmt.Errorf("failed to get provider ID for node %s at cloudprovider: %v", nodeName, err)
}
instanceMetadata, err := cnc.getInstanceMetadata(ctx, providerID, copyNode)
instanceMetadata, err := cnc.getInstanceMetadata(ctx, copyNode)
if err != nil {
return fmt.Errorf("failed to get instance metadata for node %s: %v", nodeName, err)
}
@ -439,7 +436,7 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
return nil
}
nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, providerID, copyNode, instanceMetadata)
nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, copyNode, instanceMetadata)
if err != nil {
return fmt.Errorf("failed to get node modifiers from cloud provider: %v", err)
}
@ -510,16 +507,13 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
// loop, meaning they could get called multiple times.
func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(
ctx context.Context,
providerID string,
node *v1.Node,
instanceMeta *cloudprovider.InstanceMetadata,
) ([]nodeModifier, error) {
var nodeModifiers []nodeModifier
if node.Spec.ProviderID == "" {
if providerID != "" {
nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = providerID })
} else if instanceMeta.ProviderID != "" {
if instanceMeta.ProviderID != "" {
nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = instanceMeta.ProviderID })
}
}
@ -594,35 +588,33 @@ func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(
return nodeModifiers, nil
}
func (cnc *CloudNodeController) getProviderID(ctx context.Context, node *v1.Node) (string, error) {
if node.Spec.ProviderID != "" {
return node.Spec.ProviderID, nil
}
if _, ok := cnc.cloud.InstancesV2(); ok {
// We don't need providerID when we call InstanceMetadata for InstancesV2
return "", nil
}
providerID, err := cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
if err == cloudprovider.NotImplemented {
// if the cloud provider being used does not support provider IDs,
// we can safely continue since we will attempt to set node
// addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
return "", nil
}
// if the cloud provider being used supports provider IDs, we want
// to propagate the error so that we re-try in the future; if we
// do not, the taint will be removed, and this will not be retried
return providerID, err
}
// getInstanceMetadata get instance type and nodeAddresses, use Instances if InstancesV2 is off.
func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, providerID string, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
// getInstanceMetadata get providerdID, instance type and nodeAddresses, use Instances if InstancesV2 is off.
// ProviderID is expected to be available, but to keep backward compatibility,
// we should handle some scenarios where it can be missing. It returns an error
// if providerID is missing, except when is not implemented by GetInstanceProviderID.
func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
// kubelet can set the provider ID using the flag and is inmutable
providerID := node.Spec.ProviderID
// InstancesV2 require ProviderID to be present
if instancesV2, ok := cnc.cloud.InstancesV2(); instancesV2 != nil && ok {
return instancesV2.InstanceMetadata(ctx, node)
metadata, err := instancesV2.InstanceMetadata(ctx, node)
if err != nil {
return nil, err
}
// spec.ProviderID is required for multiple controllers, like loadbalancers, so we should not
// untaint the node until is set. Once it is set, the field is immutable, so no need to reconcile.
// We only set this value during initialization and is never reconciled, so if for some reason
// we are not able to set it, the instance will never be able to acquire it.
// Before external cloud providers were enabled by default, the field was set by the kubelet, and the
// node was created with the value.
// xref: https://issues.k8s.io/123024
if metadata != nil && metadata.ProviderID == "" {
if providerID == "" {
return metadata, fmt.Errorf("cloud provider does not set node provider ID for node %s", node.Name)
}
metadata.ProviderID = providerID
}
return metadata, nil
}
// If InstancesV2 not implement, use Instances.
@ -630,6 +622,26 @@ func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, provide
if !ok {
return nil, fmt.Errorf("failed to get instances from cloud provider")
}
var err error
if providerID == "" {
providerID, err = cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
if err != nil {
// This is the only case where ProviderID can be skipped
if errors.Is(err, cloudprovider.NotImplemented) {
// if the cloud provider being used does not support provider IDs,
// we can safely continue since we will attempt to set node
// addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
} else {
// if the cloud provider being used supports provider IDs, we want
// to propagate the error so that we re-try in the future; if we
// do not, the taint will be removed, and this will not be retried
return nil, err
}
}
}
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, providerID, node.Name)
if err != nil {
return nil, err
@ -640,6 +652,7 @@ func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, provide
}
instanceMetadata := &cloudprovider.InstanceMetadata{
ProviderID: providerID,
InstanceType: instanceType,
NodeAddresses: nodeAddresses,
}

View File

@ -50,6 +50,7 @@ func Test_syncNode(t *testing.T) {
fakeCloud *fakecloud.Cloud
existingNode *v1.Node
updatedNode *v1.Node
expectedErr bool
}{
{
name: "node initialized with provider ID",
@ -545,7 +546,8 @@ func Test_syncNode(t *testing.T) {
},
},
{
name: "provided node IP address is not valid",
name: "provided node IP address is not valid",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
Addresses: []v1.NodeAddress{
@ -643,7 +645,8 @@ func Test_syncNode(t *testing.T) {
},
},
{
name: "provided node IP address is not present",
name: "provided node IP address is not present",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
Addresses: []v1.NodeAddress{
@ -835,8 +838,10 @@ func Test_syncNode(t *testing.T) {
},
},
},
{
name: "provider ID not implemented",
{ // for backward compatibility the cloud providers that does not implement
// providerID does not block the node initialization
name: "provider ID not implemented",
expectedErr: false,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{},
@ -1641,7 +1646,8 @@ func Test_syncNode(t *testing.T) {
},
},
{
name: "[instanceV2] provider ID not implemented",
name: "[instanceV2] provider ID not implemented",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{},
@ -1693,12 +1699,19 @@ func Test_syncNode(t *testing.T) {
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{},
Taints: []v1.Taint{
{
Key: cloudproviderapi.TaintExternalCloudProvider,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
},
{
name: "[instanceV2] error getting InstanceMetadata",
name: "[instanceV2] error getting InstanceMetadata",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{},
@ -1786,7 +1799,10 @@ func Test_syncNode(t *testing.T) {
w := eventBroadcaster.StartLogging(klog.Infof)
defer w.Stop()
cloudNodeController.syncNode(context.TODO(), test.existingNode.Name)
err := cloudNodeController.syncNode(context.TODO(), test.existingNode.Name)
if (err != nil) != test.expectedErr {
t.Fatalf("error got: %v expected: %v", err, test.expectedErr)
}
updatedNode, err := clientset.CoreV1().Nodes().Get(context.TODO(), test.existingNode.Name, metav1.GetOptions{})
if err != nil {
@ -1833,6 +1849,9 @@ func TestGCEConditionV2(t *testing.T) {
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
},
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12334",
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
@ -2376,15 +2395,16 @@ func TestNodeAddressesNotUpdate(t *testing.T) {
}
}
func TestGetProviderID(t *testing.T) {
func TestGetInstanceMetadata(t *testing.T) {
tests := []struct {
name string
fakeCloud *fakecloud.Cloud
existingNode *v1.Node
expectedProviderID string
name string
fakeCloud *fakecloud.Cloud
existingNode *v1.Node
expectedMetadata *cloudprovider.InstanceMetadata
expectErr bool
}{
{
name: "node initialized with provider ID",
name: "cloud implemented with Instances and provider ID",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{
@ -2393,6 +2413,9 @@ func TestGetProviderID(t *testing.T) {
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345",
},
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12345",
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
@ -2423,21 +2446,27 @@ func TestGetProviderID(t *testing.T) {
Effect: v1.TaintEffectNoSchedule,
},
},
ProviderID: "fake://12345",
},
},
expectedProviderID: "fake://12345",
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with Instances (without providerID)",
name: "cloud implemented with Instances (providerID not implemented)",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro",
},
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345",
ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): cloudprovider.NotImplemented,
},
Addresses: []v1.NodeAddress{
{
@ -2461,7 +2490,58 @@ func TestGetProviderID(t *testing.T) {
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
expectedProviderID: "fake://12345",
expectedMetadata: &cloudprovider.InstanceMetadata{
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with Instances (providerID not implemented) and node with providerID",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro",
},
ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): cloudprovider.NotImplemented,
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Err: nil,
},
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "fake://asdasd",
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://asdasd",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with InstancesV2 (with providerID)",
@ -2474,6 +2554,9 @@ func TestGetProviderID(t *testing.T) {
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345",
},
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12345",
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
@ -2503,13 +2586,20 @@ func TestGetProviderID(t *testing.T) {
Effect: v1.TaintEffectNoSchedule,
},
},
ProviderID: "fake://12345",
},
},
expectedProviderID: "fake://12345",
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with InstancesV2 (without providerID)",
{ // it will be requeueud later
name: "cloud implemented with InstancesV2 (without providerID)",
expectErr: true,
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{
@ -2550,7 +2640,59 @@ func TestGetProviderID(t *testing.T) {
},
},
},
expectedProviderID: "",
expectedMetadata: &cloudprovider.InstanceMetadata{
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with InstancesV2 (without providerID) and node with providerID",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro",
},
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345",
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Err: nil,
},
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "fake://12345",
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345",
InstanceType: "t1.micro",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
}
@ -2560,13 +2702,13 @@ func TestGetProviderID(t *testing.T) {
cloud: test.fakeCloud,
}
providerID, err := cloudNodeController.getProviderID(context.TODO(), test.existingNode)
if err != nil {
t.Fatalf("error getting provider ID: %v", err)
metadata, err := cloudNodeController.getInstanceMetadata(context.TODO(), test.existingNode)
if (err != nil) != test.expectErr {
t.Fatalf("error expected %v got: %v", test.expectErr, err)
}
if !cmp.Equal(providerID, test.expectedProviderID) {
t.Errorf("unexpected providerID %s", cmp.Diff(providerID, test.expectedProviderID))
if !cmp.Equal(metadata, test.expectedMetadata) {
t.Errorf("unexpected metadata %s", cmp.Diff(metadata, test.expectedMetadata))
}
})
}