Merge pull request #123713 from aojea/missing_providerid

node-controller require providerID to initialize a node
This commit is contained in:
Kubernetes Prow Robot 2024-03-05 14:45:53 -08:00 committed by GitHub
commit 751e3e9745
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 223 additions and 68 deletions

View File

@ -226,6 +226,7 @@ func (cnc *CloudNodeController) processNextWorkItem() bool {
if err := cnc.syncHandler(key); err != nil { if err := cnc.syncHandler(key); err != nil {
// Put the item back on the workqueue to handle any transient errors. // Put the item back on the workqueue to handle any transient errors.
cnc.workqueue.AddRateLimited(key) cnc.workqueue.AddRateLimited(key)
klog.Infof("error syncing '%s': %v, requeuing", key, err)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error()) return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
} }
@ -424,12 +425,8 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
klog.Infof("Initializing node %s with cloud provider", nodeName) klog.Infof("Initializing node %s with cloud provider", nodeName)
copyNode := curNode.DeepCopy() copyNode := curNode.DeepCopy()
providerID, err := cnc.getProviderID(ctx, copyNode)
if err != nil {
return fmt.Errorf("failed to get provider ID for node %s at cloudprovider: %v", nodeName, err)
}
instanceMetadata, err := cnc.getInstanceMetadata(ctx, providerID, copyNode) instanceMetadata, err := cnc.getInstanceMetadata(ctx, copyNode)
if err != nil { if err != nil {
return fmt.Errorf("failed to get instance metadata for node %s: %v", nodeName, err) return fmt.Errorf("failed to get instance metadata for node %s: %v", nodeName, err)
} }
@ -439,7 +436,7 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
return nil return nil
} }
nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, providerID, copyNode, instanceMetadata) nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, copyNode, instanceMetadata)
if err != nil { if err != nil {
return fmt.Errorf("failed to get node modifiers from cloud provider: %v", err) return fmt.Errorf("failed to get node modifiers from cloud provider: %v", err)
} }
@ -510,16 +507,13 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
// loop, meaning they could get called multiple times. // loop, meaning they could get called multiple times.
func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider( func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(
ctx context.Context, ctx context.Context,
providerID string,
node *v1.Node, node *v1.Node,
instanceMeta *cloudprovider.InstanceMetadata, instanceMeta *cloudprovider.InstanceMetadata,
) ([]nodeModifier, error) { ) ([]nodeModifier, error) {
var nodeModifiers []nodeModifier var nodeModifiers []nodeModifier
if node.Spec.ProviderID == "" { if node.Spec.ProviderID == "" {
if providerID != "" { if instanceMeta.ProviderID != "" {
nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = providerID })
} else if instanceMeta.ProviderID != "" {
nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = instanceMeta.ProviderID }) nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = instanceMeta.ProviderID })
} }
} }
@ -594,35 +588,33 @@ func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(
return nodeModifiers, nil return nodeModifiers, nil
} }
func (cnc *CloudNodeController) getProviderID(ctx context.Context, node *v1.Node) (string, error) { // getInstanceMetadata get providerdID, instance type and nodeAddresses, use Instances if InstancesV2 is off.
if node.Spec.ProviderID != "" { // ProviderID is expected to be available, but to keep backward compatibility,
return node.Spec.ProviderID, nil // we should handle some scenarios where it can be missing. It returns an error
} // if providerID is missing, except when is not implemented by GetInstanceProviderID.
func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
if _, ok := cnc.cloud.InstancesV2(); ok { // kubelet can set the provider ID using the flag and is inmutable
// We don't need providerID when we call InstanceMetadata for InstancesV2 providerID := node.Spec.ProviderID
return "", nil // InstancesV2 require ProviderID to be present
}
providerID, err := cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
if err == cloudprovider.NotImplemented {
// if the cloud provider being used does not support provider IDs,
// we can safely continue since we will attempt to set node
// addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
return "", nil
}
// if the cloud provider being used supports provider IDs, we want
// to propagate the error so that we re-try in the future; if we
// do not, the taint will be removed, and this will not be retried
return providerID, err
}
// getInstanceMetadata get instance type and nodeAddresses, use Instances if InstancesV2 is off.
func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, providerID string, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
if instancesV2, ok := cnc.cloud.InstancesV2(); instancesV2 != nil && ok { if instancesV2, ok := cnc.cloud.InstancesV2(); instancesV2 != nil && ok {
return instancesV2.InstanceMetadata(ctx, node) metadata, err := instancesV2.InstanceMetadata(ctx, node)
if err != nil {
return nil, err
}
// spec.ProviderID is required for multiple controllers, like loadbalancers, so we should not
// untaint the node until is set. Once it is set, the field is immutable, so no need to reconcile.
// We only set this value during initialization and is never reconciled, so if for some reason
// we are not able to set it, the instance will never be able to acquire it.
// Before external cloud providers were enabled by default, the field was set by the kubelet, and the
// node was created with the value.
// xref: https://issues.k8s.io/123024
if metadata != nil && metadata.ProviderID == "" {
if providerID == "" {
return metadata, fmt.Errorf("cloud provider does not set node provider ID for node %s", node.Name)
}
metadata.ProviderID = providerID
}
return metadata, nil
} }
// If InstancesV2 not implement, use Instances. // If InstancesV2 not implement, use Instances.
@ -630,6 +622,26 @@ func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, provide
if !ok { if !ok {
return nil, fmt.Errorf("failed to get instances from cloud provider") return nil, fmt.Errorf("failed to get instances from cloud provider")
} }
var err error
if providerID == "" {
providerID, err = cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
if err != nil {
// This is the only case where ProviderID can be skipped
if errors.Is(err, cloudprovider.NotImplemented) {
// if the cloud provider being used does not support provider IDs,
// we can safely continue since we will attempt to set node
// addresses given the node name in getNodeAddressesByProviderIDOrName
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
} else {
// if the cloud provider being used supports provider IDs, we want
// to propagate the error so that we re-try in the future; if we
// do not, the taint will be removed, and this will not be retried
return nil, err
}
}
}
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, providerID, node.Name) nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, providerID, node.Name)
if err != nil { if err != nil {
return nil, err return nil, err
@ -640,6 +652,7 @@ func (cnc *CloudNodeController) getInstanceMetadata(ctx context.Context, provide
} }
instanceMetadata := &cloudprovider.InstanceMetadata{ instanceMetadata := &cloudprovider.InstanceMetadata{
ProviderID: providerID,
InstanceType: instanceType, InstanceType: instanceType,
NodeAddresses: nodeAddresses, NodeAddresses: nodeAddresses,
} }

View File

@ -50,6 +50,7 @@ func Test_syncNode(t *testing.T) {
fakeCloud *fakecloud.Cloud fakeCloud *fakecloud.Cloud
existingNode *v1.Node existingNode *v1.Node
updatedNode *v1.Node updatedNode *v1.Node
expectedErr bool
}{ }{
{ {
name: "node initialized with provider ID", name: "node initialized with provider ID",
@ -546,6 +547,7 @@ func Test_syncNode(t *testing.T) {
}, },
{ {
name: "provided node IP address is not valid", name: "provided node IP address is not valid",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false, EnableInstancesV2: false,
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
@ -644,6 +646,7 @@ func Test_syncNode(t *testing.T) {
}, },
{ {
name: "provided node IP address is not present", name: "provided node IP address is not present",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false, EnableInstancesV2: false,
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
@ -835,8 +838,10 @@ func Test_syncNode(t *testing.T) {
}, },
}, },
}, },
{ { // for backward compatibility the cloud providers that does not implement
// providerID does not block the node initialization
name: "provider ID not implemented", name: "provider ID not implemented",
expectedErr: false,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false, EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{}, InstanceTypes: map[types.NodeName]string{},
@ -1642,6 +1647,7 @@ func Test_syncNode(t *testing.T) {
}, },
{ {
name: "[instanceV2] provider ID not implemented", name: "[instanceV2] provider ID not implemented",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true, EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{}, InstanceTypes: map[types.NodeName]string{},
@ -1693,12 +1699,19 @@ func Test_syncNode(t *testing.T) {
}, },
}, },
Spec: v1.NodeSpec{ Spec: v1.NodeSpec{
Taints: []v1.Taint{}, Taints: []v1.Taint{
{
Key: cloudproviderapi.TaintExternalCloudProvider,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
},
},
}, },
}, },
}, },
{ {
name: "[instanceV2] error getting InstanceMetadata", name: "[instanceV2] error getting InstanceMetadata",
expectedErr: true,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true, EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{}, InstanceTypes: map[types.NodeName]string{},
@ -1786,7 +1799,10 @@ func Test_syncNode(t *testing.T) {
w := eventBroadcaster.StartLogging(klog.Infof) w := eventBroadcaster.StartLogging(klog.Infof)
defer w.Stop() defer w.Stop()
cloudNodeController.syncNode(context.TODO(), test.existingNode.Name) err := cloudNodeController.syncNode(context.TODO(), test.existingNode.Name)
if (err != nil) != test.expectedErr {
t.Fatalf("error got: %v expected: %v", err, test.expectedErr)
}
updatedNode, err := clientset.CoreV1().Nodes().Get(context.TODO(), test.existingNode.Name, metav1.GetOptions{}) updatedNode, err := clientset.CoreV1().Nodes().Get(context.TODO(), test.existingNode.Name, metav1.GetOptions{})
if err != nil { if err != nil {
@ -1833,6 +1849,9 @@ func TestGCEConditionV2(t *testing.T) {
InstanceTypes: map[types.NodeName]string{ InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro", types.NodeName("node0"): "t1.micro",
}, },
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12334",
},
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{ {
Type: v1.NodeHostName, Type: v1.NodeHostName,
@ -2376,15 +2395,16 @@ func TestNodeAddressesNotUpdate(t *testing.T) {
} }
} }
func TestGetProviderID(t *testing.T) { func TestGetInstanceMetadata(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
fakeCloud *fakecloud.Cloud fakeCloud *fakecloud.Cloud
existingNode *v1.Node existingNode *v1.Node
expectedProviderID string expectedMetadata *cloudprovider.InstanceMetadata
expectErr bool
}{ }{
{ {
name: "node initialized with provider ID", name: "cloud implemented with Instances and provider ID",
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false, EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{ InstanceTypes: map[types.NodeName]string{
@ -2393,6 +2413,9 @@ func TestGetProviderID(t *testing.T) {
ExtID: map[types.NodeName]string{ ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345", types.NodeName("node0"): "12345",
}, },
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12345",
},
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{ {
Type: v1.NodeHostName, Type: v1.NodeHostName,
@ -2423,21 +2446,27 @@ func TestGetProviderID(t *testing.T) {
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
}, },
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345", ProviderID: "fake://12345",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
}, },
}, },
expectedProviderID: "fake://12345",
}, },
{ {
name: "cloud implemented with Instances (without providerID)", name: "cloud implemented with Instances (providerID not implemented)",
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false, EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{ InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro", types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro", types.NodeName("fake://12345"): "t1.micro",
}, },
ExtID: map[types.NodeName]string{ ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): "12345", types.NodeName("node0"): cloudprovider.NotImplemented,
}, },
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{ {
@ -2461,7 +2490,58 @@ func TestGetProviderID(t *testing.T) {
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
}, },
}, },
expectedProviderID: "fake://12345", expectedMetadata: &cloudprovider.InstanceMetadata{
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with Instances (providerID not implemented) and node with providerID",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: false,
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro",
},
ExtIDErr: map[types.NodeName]error{
types.NodeName("node0"): cloudprovider.NotImplemented,
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Err: nil,
},
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "fake://asdasd",
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://asdasd",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
}, },
{ {
name: "cloud implemented with InstancesV2 (with providerID)", name: "cloud implemented with InstancesV2 (with providerID)",
@ -2474,6 +2554,9 @@ func TestGetProviderID(t *testing.T) {
ExtID: map[types.NodeName]string{ ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345", types.NodeName("node0"): "12345",
}, },
ProviderID: map[types.NodeName]string{
types.NodeName("node0"): "fake://12345",
},
Addresses: []v1.NodeAddress{ Addresses: []v1.NodeAddress{
{ {
Type: v1.NodeHostName, Type: v1.NodeHostName,
@ -2503,13 +2586,20 @@ func TestGetProviderID(t *testing.T) {
Effect: v1.TaintEffectNoSchedule, Effect: v1.TaintEffectNoSchedule,
}, },
}, },
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345", ProviderID: "fake://12345",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
}, },
}, },
expectedProviderID: "fake://12345",
}, },
{ { // it will be requeueud later
name: "cloud implemented with InstancesV2 (without providerID)", name: "cloud implemented with InstancesV2 (without providerID)",
expectErr: true,
fakeCloud: &fakecloud.Cloud{ fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true, EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{ InstanceTypes: map[types.NodeName]string{
@ -2550,7 +2640,59 @@ func TestGetProviderID(t *testing.T) {
}, },
}, },
}, },
expectedProviderID: "", expectedMetadata: &cloudprovider.InstanceMetadata{
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
},
{
name: "cloud implemented with InstancesV2 (without providerID) and node with providerID",
fakeCloud: &fakecloud.Cloud{
EnableInstancesV2: true,
InstanceTypes: map[types.NodeName]string{
types.NodeName("node0"): "t1.micro",
types.NodeName("fake://12345"): "t1.micro",
},
ExtID: map[types.NodeName]string{
types.NodeName("node0"): "12345",
},
Addresses: []v1.NodeAddress{
{
Type: v1.NodeHostName,
Address: "node0.cloud.internal",
},
{
Type: v1.NodeInternalIP,
Address: "10.0.0.1",
},
{
Type: v1.NodeExternalIP,
Address: "132.143.154.163",
},
},
Err: nil,
},
existingNode: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "fake://12345",
},
},
expectedMetadata: &cloudprovider.InstanceMetadata{
ProviderID: "fake://12345",
InstanceType: "t1.micro",
NodeAddresses: []v1.NodeAddress{
{Type: "Hostname", Address: "node0.cloud.internal"},
{Type: "InternalIP", Address: "10.0.0.1"},
{Type: "ExternalIP", Address: "132.143.154.163"},
},
},
}, },
} }
@ -2560,13 +2702,13 @@ func TestGetProviderID(t *testing.T) {
cloud: test.fakeCloud, cloud: test.fakeCloud,
} }
providerID, err := cloudNodeController.getProviderID(context.TODO(), test.existingNode) metadata, err := cloudNodeController.getInstanceMetadata(context.TODO(), test.existingNode)
if err != nil { if (err != nil) != test.expectErr {
t.Fatalf("error getting provider ID: %v", err) t.Fatalf("error expected %v got: %v", test.expectErr, err)
} }
if !cmp.Equal(providerID, test.expectedProviderID) { if !cmp.Equal(metadata, test.expectedMetadata) {
t.Errorf("unexpected providerID %s", cmp.Diff(providerID, test.expectedProviderID)) t.Errorf("unexpected metadata %s", cmp.Diff(metadata, test.expectedMetadata))
} }
}) })
} }