Merge pull request #90749 from andyzhangx/dangling-error

fix: azure disk dangling attach issue on VMSS which would cause API throttling
This commit is contained in:
Kubernetes Prow Robot 2020-05-05 23:43:09 -07:00 committed by GitHub
commit 4d813c1360
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 56 additions and 24 deletions

View File

@ -121,6 +121,11 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
diskEncryptionSetID := "" diskEncryptionSetID := ""
writeAcceleratorEnabled := false writeAcceleratorEnabled := false
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return -1, err
}
if isManagedDisk { if isManagedDisk {
diskName := path.Base(diskURI) diskName := path.Base(diskURI)
resourceGroup, err := getResourceGroupFromDiskURI(diskURI) resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
@ -140,9 +145,12 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
attachErr := fmt.Sprintf( attachErr := fmt.Sprintf(
"disk(%s) already attached to node(%s), could not be attached to node(%s)", "disk(%s) already attached to node(%s), could not be attached to node(%s)",
diskURI, *disk.ManagedBy, nodeName) diskURI, *disk.ManagedBy, nodeName)
attachedNode := path.Base(*disk.ManagedBy) attachedNode, err := vmset.GetNodeNameByProviderID(*disk.ManagedBy)
if err != nil {
return -1, err
}
klog.V(2).Infof("found dangling volume %s attached to node %s", diskURI, attachedNode) klog.V(2).Infof("found dangling volume %s attached to node %s", diskURI, attachedNode)
danglingErr := volerr.NewDanglingError(attachErr, types.NodeName(attachedNode), "") danglingErr := volerr.NewDanglingError(attachErr, attachedNode, "")
return -1, danglingErr return -1, danglingErr
} }
@ -157,11 +165,6 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
} }
} }
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return -1, err
}
instanceid, err := c.cloud.InstanceID(context.TODO(), nodeName) instanceid, err := c.cloud.InstanceID(context.TODO(), nodeName)
if err != nil { if err != nil {
klog.Warningf("failed to get azure instance id (%v) for node %s", err, nodeName) klog.Warningf("failed to get azure instance id (%v) for node %s", err, nodeName)

View File

@ -405,7 +405,7 @@ func (az *Cloud) getServiceLoadBalancerStatus(service *v1.Service, lb *network.L
if pipID == nil { if pipID == nil {
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress ID is Nil", serviceName, *lb.Name) return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress ID is Nil", serviceName, *lb.Name)
} }
pipName, err := getLastSegment(*pipID) pipName, err := getLastSegment(*pipID, "/")
if err != nil { if err != nil {
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress Name from ID(%s)", serviceName, *lb.Name, *pipID) return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress Name from ID(%s)", serviceName, *lb.Name, *pipID)
} }

View File

@ -72,7 +72,7 @@ const (
) )
var errNotInVMSet = errors.New("vm is not in the vmset") var errNotInVMSet = errors.New("vm is not in the vmset")
var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`) var providerIDRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
var backendPoolIDRE = regexp.MustCompile(`^/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Network/loadBalancers/(.+)/backendAddressPools/(?:.*)`) var backendPoolIDRE = regexp.MustCompile(`^/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Network/loadBalancers/(.+)/backendAddressPools/(?:.*)`)
var nicResourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Network/networkInterfaces/(?:.*)`) var nicResourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Network/networkInterfaces/(?:.*)`)
@ -171,8 +171,8 @@ func isMasterNode(node *v1.Node) bool {
} }
// returns the deepest child's identifier from a full identifier string. // returns the deepest child's identifier from a full identifier string.
func getLastSegment(ID string) (string, error) { func getLastSegment(ID, separator string) (string, error) {
parts := strings.Split(ID, "/") parts := strings.Split(ID, separator)
name := parts[len(parts)-1] name := parts[len(parts)-1]
if len(name) == 0 { if len(name) == 0 {
return "", fmt.Errorf("resource name was missing from identifier") return "", fmt.Errorf("resource name was missing from identifier")
@ -519,7 +519,7 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
publicIP := "" publicIP := ""
if ipConfig.PublicIPAddress != nil && ipConfig.PublicIPAddress.ID != nil { if ipConfig.PublicIPAddress != nil && ipConfig.PublicIPAddress.ID != nil {
pipID := *ipConfig.PublicIPAddress.ID pipID := *ipConfig.PublicIPAddress.ID
pipName, err := getLastSegment(pipID) pipName, err := getLastSegment(pipID, "/")
if err != nil { if err != nil {
return "", "", fmt.Errorf("failed to publicIP name for node %q with pipID %q", name, pipID) return "", "", fmt.Errorf("failed to publicIP name for node %q with pipID %q", name, pipID)
} }
@ -589,7 +589,7 @@ func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentP
// already added in the list // already added in the list
continue continue
} }
asName, err := getLastSegment(asID) asName, err := getLastSegment(asID, "/")
if err != nil { if err != nil {
klog.Errorf("as.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err) klog.Errorf("as.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err)
return nil, err return nil, err
@ -680,7 +680,7 @@ func (as *availabilitySet) getPrimaryInterfaceWithVMSet(nodeName, vmSetName stri
if err != nil { if err != nil {
return network.Interface{}, err return network.Interface{}, err
} }
nicName, err := getLastSegment(primaryNicID) nicName, err := getLastSegment(primaryNicID, "/")
if err != nil { if err != nil {
return network.Interface{}, err return network.Interface{}, err
} }

View File

@ -62,33 +62,44 @@ func TestIsMasterNode(t *testing.T) {
func TestGetLastSegment(t *testing.T) { func TestGetLastSegment(t *testing.T) {
tests := []struct { tests := []struct {
ID string ID string
separator string
expected string expected string
expectErr bool expectErr bool
}{ }{
{ {
ID: "", ID: "",
separator: "/",
expected: "", expected: "",
expectErr: true, expectErr: true,
}, },
{ {
ID: "foo/", ID: "foo/",
separator: "/",
expected: "", expected: "",
expectErr: true, expectErr: true,
}, },
{ {
ID: "foo/bar", ID: "foo/bar",
separator: "/",
expected: "bar", expected: "bar",
expectErr: false, expectErr: false,
}, },
{ {
ID: "foo/bar/baz", ID: "foo/bar/baz",
separator: "/",
expected: "baz", expected: "baz",
expectErr: false, expectErr: false,
}, },
{
ID: "k8s-agentpool-36841236-vmss_1",
separator: "_",
expected: "1",
expectErr: false,
},
} }
for _, test := range tests { for _, test := range tests {
s, e := getLastSegment(test.ID) s, e := getLastSegment(test.ID, test.separator)
if test.expectErr && e == nil { if test.expectErr && e == nil {
t.Errorf("Expected err, but it was nil") t.Errorf("Expected err, but it was nil")
continue continue

View File

@ -2149,10 +2149,15 @@ func TestGetNodeNameByProviderID(t *testing.T) {
name: "k8s-agent-AAAAAAAA-0", name: "k8s-agent-AAAAAAAA-0",
fail: false, fail: false,
}, },
{
providerID: "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-1",
name: "k8s-agent-AAAAAAAA-1",
fail: false,
},
{ {
providerID: CloudProviderName + ":/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0", providerID: CloudProviderName + ":/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "", name: "k8s-agent-AAAAAAAA-0",
fail: true, fail: false,
}, },
{ {
providerID: CloudProviderName + "://", providerID: CloudProviderName + "://",
@ -2161,20 +2166,20 @@ func TestGetNodeNameByProviderID(t *testing.T) {
}, },
{ {
providerID: ":///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0", providerID: ":///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "", name: "k8s-agent-AAAAAAAA-0",
fail: true, fail: false,
}, },
{ {
providerID: "aws:///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0", providerID: "aws:///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "", name: "k8s-agent-AAAAAAAA-0",
fail: true, fail: false,
}, },
} }
for _, test := range providers { for _, test := range providers {
name, err := az.vmSet.GetNodeNameByProviderID(test.providerID) name, err := az.vmSet.GetNodeNameByProviderID(test.providerID)
if (err != nil) != test.fail { if (err != nil) != test.fail {
t.Errorf("Expected to failt=%t, with pattern %v", test.fail, test) t.Errorf("Expected to fail=%t, with pattern %v", test.fail, test)
} }
if test.fail { if test.fail {

View File

@ -285,6 +285,11 @@ func (ss *scaleSet) GetInstanceIDByNodeName(name string) (string, error) {
} }
// GetNodeNameByProviderID gets the node name by provider ID. // GetNodeNameByProviderID gets the node name by provider ID.
// providerID example:
// 1. vmas providerID: azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-27053986-0
// 2. vmss providerID:
// azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/1
// /subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/k8s-agentpool-36841236-vmss_1
func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName, error) { func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName, error) {
// NodeName is not part of providerID for vmss instances. // NodeName is not part of providerID for vmss instances.
scaleSetName, err := extractScaleSetNameByProviderID(providerID) scaleSetName, err := extractScaleSetNameByProviderID(providerID)
@ -298,12 +303,20 @@ func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName,
return "", fmt.Errorf("error of extracting resource group for node %q", providerID) return "", fmt.Errorf("error of extracting resource group for node %q", providerID)
} }
instanceID, err := getLastSegment(providerID) instanceID, err := getLastSegment(providerID, "/")
if err != nil { if err != nil {
klog.V(4).Infof("Can not extract instanceID from providerID (%s), assuming it is managed by availability set: %v", providerID, err) klog.V(4).Infof("Can not extract instanceID from providerID (%s), assuming it is managed by availability set: %v", providerID, err)
return ss.availabilitySet.GetNodeNameByProviderID(providerID) return ss.availabilitySet.GetNodeNameByProviderID(providerID)
} }
// instanceID contains scaleSetName (returned by disk.ManagedBy), e.g. k8s-agentpool-36841236-vmss_1
if strings.HasPrefix(strings.ToLower(instanceID), strings.ToLower(scaleSetName)) {
instanceID, err = getLastSegment(instanceID, "_")
if err != nil {
return "", err
}
}
vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe) vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe)
if err != nil { if err != nil {
return "", err return "", err
@ -695,7 +708,7 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
return network.Interface{}, err return network.Interface{}, err
} }
nicName, err := getLastSegment(primaryInterfaceID) nicName, err := getLastSegment(primaryInterfaceID, "/")
if err != nil { if err != nil {
klog.Errorf("error: ss.GetPrimaryInterface(%s), getLastSegment(%s), err=%v", nodeName, primaryInterfaceID, err) klog.Errorf("error: ss.GetPrimaryInterface(%s), getLastSegment(%s), err=%v", nodeName, primaryInterfaceID, err)
return network.Interface{}, err return network.Interface{}, err