Merge pull request #95748 from nilo19/bug/fix-lb-update-failed-pip

Update the PIP when it is not in the Succeeded provisioning state during the LB update.
This commit is contained in:
Kubernetes Prow Robot 2020-10-21 01:34:19 -07:00 committed by GitHub
commit 9a6e35a16a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 7 deletions

View File

@ -20,6 +20,7 @@ package azure
import (
"net/http"
"regexp"
"strings"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
@ -44,6 +45,12 @@ const (
operationCanceledErrorMessage = "canceledandsupersededduetoanotheroperation"
cannotDeletePublicIPErrorMessageCode = "PublicIPAddressCannotBeDeleted"
referencedResourceNotProvisionedMessageCode = "ReferencedResourceNotProvisioned"
)
var (
pipErrorMessageRE = regexp.MustCompile(`(?:.*)/subscriptions/(?:.*)/resourceGroups/(.*)/providers/Microsoft.Network/publicIPAddresses/([^\s]+)(?:.*)`)
)
// RequestBackoff if backoff is disabled in cloud provider it
@ -182,7 +189,7 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
defer cancel()
rgName := az.getLoadBalancerResourceGroup()
rerr := az.LoadBalancerClient.CreateOrUpdate(ctx, rgName, *lb.Name, lb, to.String(lb.Etag))
rerr := az.LoadBalancerClient.CreateOrUpdate(ctx, rgName, to.String(lb.Name), lb, to.String(lb.Etag))
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
if rerr == nil {
// Invalidate the cache right after updating
@ -192,12 +199,39 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer)
// Invalidate the cache because ETAG precondition mismatch.
if rerr.HTTPStatusCode == http.StatusPreconditionFailed {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", *lb.Name)
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", to.String(lb.Name))
az.lbCache.Delete(*lb.Name)
}
retryErrorMessage := rerr.Error().Error()
// Invalidate the cache because another new operation has canceled the current request.
if strings.Contains(strings.ToLower(rerr.Error().Error()), operationCanceledErrorMessage) {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", *lb.Name)
if strings.Contains(strings.ToLower(retryErrorMessage), operationCanceledErrorMessage) {
klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", to.String(lb.Name))
az.lbCache.Delete(*lb.Name)
}
// The LB update may fail because the referenced PIP is not in the Succeeded provisioning state
if strings.Contains(strings.ToLower(retryErrorMessage), strings.ToLower(referencedResourceNotProvisionedMessageCode)) {
matches := pipErrorMessageRE.FindStringSubmatch(retryErrorMessage)
if len(matches) != 3 {
klog.Warningf("Failed to parse the retry error message %s", retryErrorMessage)
return rerr.Error()
}
pipRG, pipName := matches[1], matches[2]
klog.V(3).Infof("The public IP %s referenced by load balancer %s is not in Succeeded provisioning state, will try to update it", pipName, to.String(lb.Name))
pip, _, err := az.getPublicIPAddress(pipRG, pipName)
if err != nil {
klog.Warningf("Failed to get the public IP %s in resource group %s: %v", pipName, pipRG, err)
return rerr.Error()
}
// Perform a dummy update to fix the provisioning state
err = az.CreateOrUpdatePIP(service, pipRG, pip)
if err != nil {
klog.Warningf("Failed to update the public IP %s in resource group %s: %v", pipName, pipRG, err)
return rerr.Error()
}
// Invalidate the LB cache, return the error, and the controller manager
// would retry the LB update in the next reconcile loop
az.lbCache.Delete(*lb.Name)
}
@ -241,10 +275,10 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string,
ctx, cancel := getContextWithCancel()
defer cancel()
rerr := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, *pip.Name, pip)
klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, *pip.Name)
rerr := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, to.String(pip.Name), pip)
klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, to.String(pip.Name))
if rerr != nil {
klog.Errorf("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s", pipResourceGroup, *pip.Name, rerr.Error().Error())
klog.Errorf("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s", pipResourceGroup, to.String(pip.Name), rerr.Error().Error())
az.Event(service, v1.EventTypeWarning, "CreateOrUpdatePublicIPAddress", rerr.Error().Error())
return rerr.Error()
}

View File

@ -240,6 +240,8 @@ func TestCreateOrUpdateLB(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
referencedResourceNotProvisionedRawErrorString := `Code="ReferencedResourceNotProvisioned" Message="Cannot proceed with operation because resource /subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/publicIPAddresses/pip used by resource /subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/loadBalancers/lb is not in Succeeded state. Resource is in Failed state and the last operation that updated/is updating the resource is PutPublicIpAddressOperation."`
tests := []struct {
clientErr *retry.Error
expectedErr error
@ -252,6 +254,10 @@ func TestCreateOrUpdateLB(t *testing.T) {
clientErr: &retry.Error{RawError: fmt.Errorf(operationCanceledErrorMessage)},
expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"),
},
{
clientErr: &retry.Error{RawError: fmt.Errorf(referencedResourceNotProvisionedRawErrorString)},
expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %s", referencedResourceNotProvisionedRawErrorString),
},
}
for _, test := range tests {
@ -262,6 +268,15 @@ func TestCreateOrUpdateLB(t *testing.T) {
mockLBClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, gomock.Any(), gomock.Any(), gomock.Any()).Return(test.clientErr)
mockLBClient.EXPECT().Get(gomock.Any(), az.ResourceGroup, "lb", gomock.Any()).Return(network.LoadBalancer{}, nil)
mockPIPClient := az.PublicIPAddressesClient.(*mockpublicipclient.MockInterface)
mockPIPClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, "pip", gomock.Any()).Return(nil).AnyTimes()
mockPIPClient.EXPECT().Get(gomock.Any(), az.ResourceGroup, "pip", gomock.Any()).Return(network.PublicIPAddress{
Name: to.StringPtr("pip"),
PublicIPAddressPropertiesFormat: &network.PublicIPAddressPropertiesFormat{
ProvisioningState: to.StringPtr("Succeeded"),
},
}, nil).AnyTimes()
err := az.CreateOrUpdateLB(&v1.Service{}, network.LoadBalancer{
Name: to.StringPtr("lb"),
Etag: to.StringPtr("etag"),