Merge pull request #90749 from andyzhangx/dangling-error

fix: azure disk dangling attach issue on VMSS which would cause API throttling
This commit is contained in:
Kubernetes Prow Robot 2020-05-05 23:43:09 -07:00 committed by GitHub
commit 4d813c1360
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 56 additions and 24 deletions

View File

@ -121,6 +121,11 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
diskEncryptionSetID := ""
writeAcceleratorEnabled := false
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return -1, err
}
if isManagedDisk {
diskName := path.Base(diskURI)
resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
@ -140,9 +145,12 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
attachErr := fmt.Sprintf(
"disk(%s) already attached to node(%s), could not be attached to node(%s)",
diskURI, *disk.ManagedBy, nodeName)
attachedNode := path.Base(*disk.ManagedBy)
attachedNode, err := vmset.GetNodeNameByProviderID(*disk.ManagedBy)
if err != nil {
return -1, err
}
klog.V(2).Infof("found dangling volume %s attached to node %s", diskURI, attachedNode)
danglingErr := volerr.NewDanglingError(attachErr, types.NodeName(attachedNode), "")
danglingErr := volerr.NewDanglingError(attachErr, attachedNode, "")
return -1, danglingErr
}
@ -157,11 +165,6 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
}
}
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return -1, err
}
instanceid, err := c.cloud.InstanceID(context.TODO(), nodeName)
if err != nil {
klog.Warningf("failed to get azure instance id (%v) for node %s", err, nodeName)

View File

@ -405,7 +405,7 @@ func (az *Cloud) getServiceLoadBalancerStatus(service *v1.Service, lb *network.L
if pipID == nil {
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress ID is Nil", serviceName, *lb.Name)
}
pipName, err := getLastSegment(*pipID)
pipName, err := getLastSegment(*pipID, "/")
if err != nil {
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress Name from ID(%s)", serviceName, *lb.Name, *pipID)
}

View File

@ -72,7 +72,7 @@ const (
)
var errNotInVMSet = errors.New("vm is not in the vmset")
var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
var providerIDRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
var backendPoolIDRE = regexp.MustCompile(`^/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Network/loadBalancers/(.+)/backendAddressPools/(?:.*)`)
var nicResourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Network/networkInterfaces/(?:.*)`)
@ -171,8 +171,8 @@ func isMasterNode(node *v1.Node) bool {
}
// returns the deepest child's identifier from a full identifier string.
func getLastSegment(ID string) (string, error) {
parts := strings.Split(ID, "/")
func getLastSegment(ID, separator string) (string, error) {
parts := strings.Split(ID, separator)
name := parts[len(parts)-1]
if len(name) == 0 {
return "", fmt.Errorf("resource name was missing from identifier")
@ -519,7 +519,7 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
publicIP := ""
if ipConfig.PublicIPAddress != nil && ipConfig.PublicIPAddress.ID != nil {
pipID := *ipConfig.PublicIPAddress.ID
pipName, err := getLastSegment(pipID)
pipName, err := getLastSegment(pipID, "/")
if err != nil {
return "", "", fmt.Errorf("failed to publicIP name for node %q with pipID %q", name, pipID)
}
@ -589,7 +589,7 @@ func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentP
// already added in the list
continue
}
asName, err := getLastSegment(asID)
asName, err := getLastSegment(asID, "/")
if err != nil {
klog.Errorf("as.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err)
return nil, err
@ -680,7 +680,7 @@ func (as *availabilitySet) getPrimaryInterfaceWithVMSet(nodeName, vmSetName stri
if err != nil {
return network.Interface{}, err
}
nicName, err := getLastSegment(primaryNicID)
nicName, err := getLastSegment(primaryNicID, "/")
if err != nil {
return network.Interface{}, err
}

View File

@ -62,33 +62,44 @@ func TestIsMasterNode(t *testing.T) {
func TestGetLastSegment(t *testing.T) {
tests := []struct {
ID string
separator string
expected string
expectErr bool
}{
{
ID: "",
separator: "/",
expected: "",
expectErr: true,
},
{
ID: "foo/",
separator: "/",
expected: "",
expectErr: true,
},
{
ID: "foo/bar",
separator: "/",
expected: "bar",
expectErr: false,
},
{
ID: "foo/bar/baz",
separator: "/",
expected: "baz",
expectErr: false,
},
{
ID: "k8s-agentpool-36841236-vmss_1",
separator: "_",
expected: "1",
expectErr: false,
},
}
for _, test := range tests {
s, e := getLastSegment(test.ID)
s, e := getLastSegment(test.ID, test.separator)
if test.expectErr && e == nil {
t.Errorf("Expected err, but it was nil")
continue

View File

@ -2149,10 +2149,15 @@ func TestGetNodeNameByProviderID(t *testing.T) {
name: "k8s-agent-AAAAAAAA-0",
fail: false,
},
{
providerID: "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-1",
name: "k8s-agent-AAAAAAAA-1",
fail: false,
},
{
providerID: CloudProviderName + ":/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "",
fail: true,
name: "k8s-agent-AAAAAAAA-0",
fail: false,
},
{
providerID: CloudProviderName + "://",
@ -2161,20 +2166,20 @@ func TestGetNodeNameByProviderID(t *testing.T) {
},
{
providerID: ":///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "",
fail: true,
name: "k8s-agent-AAAAAAAA-0",
fail: false,
},
{
providerID: "aws:///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
name: "",
fail: true,
name: "k8s-agent-AAAAAAAA-0",
fail: false,
},
}
for _, test := range providers {
name, err := az.vmSet.GetNodeNameByProviderID(test.providerID)
if (err != nil) != test.fail {
t.Errorf("Expected to failt=%t, with pattern %v", test.fail, test)
t.Errorf("Expected to fail=%t, with pattern %v", test.fail, test)
}
if test.fail {

View File

@ -285,6 +285,11 @@ func (ss *scaleSet) GetInstanceIDByNodeName(name string) (string, error) {
}
// GetNodeNameByProviderID gets the node name by provider ID.
// providerID example:
// 1. vmas providerID: azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-27053986-0
// 2. vmss providerID:
// azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/1
// /subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/k8s-agentpool-36841236-vmss_1
func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName, error) {
// NodeName is not part of providerID for vmss instances.
scaleSetName, err := extractScaleSetNameByProviderID(providerID)
@ -298,12 +303,20 @@ func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName,
return "", fmt.Errorf("error of extracting resource group for node %q", providerID)
}
instanceID, err := getLastSegment(providerID)
instanceID, err := getLastSegment(providerID, "/")
if err != nil {
klog.V(4).Infof("Can not extract instanceID from providerID (%s), assuming it is managed by availability set: %v", providerID, err)
return ss.availabilitySet.GetNodeNameByProviderID(providerID)
}
// instanceID contains scaleSetName (returned by disk.ManagedBy), e.g. k8s-agentpool-36841236-vmss_1
if strings.HasPrefix(strings.ToLower(instanceID), strings.ToLower(scaleSetName)) {
instanceID, err = getLastSegment(instanceID, "_")
if err != nil {
return "", err
}
}
vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe)
if err != nil {
return "", err
@ -695,7 +708,7 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
return network.Interface{}, err
}
nicName, err := getLastSegment(primaryInterfaceID)
nicName, err := getLastSegment(primaryInterfaceID, "/")
if err != nil {
klog.Errorf("error: ss.GetPrimaryInterface(%s), getLastSegment(%s), err=%v", nodeName, primaryInterfaceID, err)
return network.Interface{}, err