From edfb2ad55293841abd3cbabe45fa162499831a7a Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Tue, 14 Nov 2017 17:39:55 -0800 Subject: [PATCH 01/18] Azure load balancer general improvement --- pkg/cloudprovider/providers/azure/BUILD | 1 + pkg/cloudprovider/providers/azure/azure.go | 142 +++- .../providers/azure/azure_backoff.go | 205 ++++- .../providers/azure/azure_fakes.go | 584 +++++++++++++ .../providers/azure/azure_instances.go | 33 - .../providers/azure/azure_loadbalancer.go | 793 +++++++++--------- .../providers/azure/azure_loadbalancer.md | 68 ++ .../providers/azure/azure_test.go | 785 ++++++++++++++--- .../providers/azure/azure_util.go | 212 ++++- .../providers/azure/azure_wrap.go | 33 +- 10 files changed, 2240 insertions(+), 616 deletions(-) create mode 100644 pkg/cloudprovider/providers/azure/azure_fakes.go create mode 100644 pkg/cloudprovider/providers/azure/azure_loadbalancer.md diff --git a/pkg/cloudprovider/providers/azure/BUILD b/pkg/cloudprovider/providers/azure/BUILD index 8aa8da13c15..c1d7bb6d735 100644 --- a/pkg/cloudprovider/providers/azure/BUILD +++ b/pkg/cloudprovider/providers/azure/BUILD @@ -13,6 +13,7 @@ go_library( "azure_backoff.go", "azure_blobDiskController.go", "azure_controllerCommon.go", + "azure_fakes.go", "azure_file.go", "azure_instance_metadata.go", "azure_instances.go", diff --git a/pkg/cloudprovider/providers/azure/azure.go b/pkg/cloudprovider/providers/azure/azure.go index ee9ebf352fa..a7cff34e774 100644 --- a/pkg/cloudprovider/providers/azure/azure.go +++ b/pkg/cloudprovider/providers/azure/azure.go @@ -44,13 +44,14 @@ import ( const ( // CloudProviderName is the value used for the --cloud-provider flag - CloudProviderName = "azure" - rateLimitQPSDefault = 1.0 - rateLimitBucketDefault = 5 - backoffRetriesDefault = 6 - backoffExponentDefault = 1.5 - backoffDurationDefault = 5 // in seconds - backoffJitterDefault = 1.0 + CloudProviderName = "azure" + rateLimitQPSDefault = 1.0 + rateLimitBucketDefault = 5 + backoffRetriesDefault = 6 + backoffExponentDefault = 1.5 + backoffDurationDefault = 5 // in seconds + backoffJitterDefault = 1.0 + maximumLoadBalancerRuleCount = 148 // According to Azure LB rule default limit ) // Config holds the configuration parsed from the --cloud-config flag @@ -113,6 +114,51 @@ type Config struct { // Use managed service identity for the virtual machine to access Azure ARM APIs UseManagedIdentityExtension bool `json:"useManagedIdentityExtension"` + + // Maximum allowed LoadBalancer Rule Count is the limit enforced by Azure Load balancer + MaximumLoadBalancerRuleCount int `json:"maximumLoadBalancerRuleCount"` +} + +type iVirtualMachinesClient interface { + CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) + Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) + List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) + ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) +} + +type iInterfacesClient interface { + CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) + Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) +} + +type iLoadBalancersClient interface { + CreateOrUpdate(resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, cancel <-chan struct{}) (<-chan network.LoadBalancer, <-chan error) + Delete(resourceGroupName string, loadBalancerName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) + Get(resourceGroupName string, loadBalancerName string, expand string) (result network.LoadBalancer, err error) + List(resourceGroupName string) (result network.LoadBalancerListResult, err error) + ListNextResults(lastResult network.LoadBalancerListResult) (result network.LoadBalancerListResult, err error) +} + +type iPublicIPAddressesClient interface { + CreateOrUpdate(resourceGroupName string, publicIPAddressName string, parameters network.PublicIPAddress, cancel <-chan struct{}) (<-chan network.PublicIPAddress, <-chan error) + Delete(resourceGroupName string, publicIPAddressName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) + Get(resourceGroupName string, publicIPAddressName string, expand string) (result network.PublicIPAddress, err error) + List(resourceGroupName string) (result network.PublicIPAddressListResult, err error) + ListNextResults(lastResults network.PublicIPAddressListResult) (result network.PublicIPAddressListResult, err error) +} + +type iSubnetsClient interface { + CreateOrUpdate(resourceGroupName string, virtualNetworkName string, subnetName string, subnetParameters network.Subnet, cancel <-chan struct{}) (<-chan network.Subnet, <-chan error) + Delete(resourceGroupName string, virtualNetworkName string, subnetName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) + Get(resourceGroupName string, virtualNetworkName string, subnetName string, expand string) (result network.Subnet, err error) + List(resourceGroupName string, virtualNetworkName string) (result network.SubnetListResult, err error) +} + +type iSecurityGroupsClient interface { + CreateOrUpdate(resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, cancel <-chan struct{}) (<-chan network.SecurityGroup, <-chan error) + Delete(resourceGroupName string, networkSecurityGroupName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) + Get(resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error) + List(resourceGroupName string) (result network.SecurityGroupListResult, err error) } // Cloud holds the config and clients @@ -120,13 +166,13 @@ type Cloud struct { Config Environment azure.Environment RoutesClient network.RoutesClient - SubnetsClient network.SubnetsClient - InterfacesClient network.InterfacesClient + SubnetsClient iSubnetsClient + InterfacesClient iInterfacesClient RouteTablesClient network.RouteTablesClient - LoadBalancerClient network.LoadBalancersClient - PublicIPAddressesClient network.PublicIPAddressesClient - SecurityGroupsClient network.SecurityGroupsClient - VirtualMachinesClient compute.VirtualMachinesClient + LoadBalancerClient iLoadBalancersClient + PublicIPAddressesClient iPublicIPAddressesClient + SecurityGroupsClient iSecurityGroupsClient + VirtualMachinesClient iVirtualMachinesClient StorageAccountClient storage.AccountsClient DisksClient disk.DisksClient operationPollRateLimiter flowcontrol.RateLimiter @@ -221,11 +267,12 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) { return nil, err } - az.SubnetsClient = network.NewSubnetsClient(az.SubscriptionID) - az.SubnetsClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.SubnetsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.SubnetsClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.SubnetsClient.Client) + subnetsClient := network.NewSubnetsClient(az.SubscriptionID) + subnetsClient.BaseURI = az.Environment.ResourceManagerEndpoint + subnetsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + subnetsClient.PollingDelay = 5 * time.Second + configureUserAgent(&subnetsClient.Client) + az.SubnetsClient = subnetsClient az.RouteTablesClient = network.NewRouteTablesClient(az.SubscriptionID) az.RouteTablesClient.BaseURI = az.Environment.ResourceManagerEndpoint @@ -239,35 +286,40 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) { az.RoutesClient.PollingDelay = 5 * time.Second configureUserAgent(&az.RoutesClient.Client) - az.InterfacesClient = network.NewInterfacesClient(az.SubscriptionID) - az.InterfacesClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.InterfacesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.InterfacesClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.InterfacesClient.Client) + interfacesClient := network.NewInterfacesClient(az.SubscriptionID) + interfacesClient.BaseURI = az.Environment.ResourceManagerEndpoint + interfacesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + interfacesClient.PollingDelay = 5 * time.Second + configureUserAgent(&interfacesClient.Client) + az.InterfacesClient = interfacesClient - az.LoadBalancerClient = network.NewLoadBalancersClient(az.SubscriptionID) - az.LoadBalancerClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.LoadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.LoadBalancerClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.LoadBalancerClient.Client) + loadBalancerClient := network.NewLoadBalancersClient(az.SubscriptionID) + loadBalancerClient.BaseURI = az.Environment.ResourceManagerEndpoint + loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + loadBalancerClient.PollingDelay = 5 * time.Second + configureUserAgent(&loadBalancerClient.Client) + az.LoadBalancerClient = loadBalancerClient - az.VirtualMachinesClient = compute.NewVirtualMachinesClient(az.SubscriptionID) - az.VirtualMachinesClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.VirtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.VirtualMachinesClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.VirtualMachinesClient.Client) + virtualMachinesClient := compute.NewVirtualMachinesClient(az.SubscriptionID) + virtualMachinesClient.BaseURI = az.Environment.ResourceManagerEndpoint + virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + virtualMachinesClient.PollingDelay = 5 * time.Second + configureUserAgent(&virtualMachinesClient.Client) + az.VirtualMachinesClient = virtualMachinesClient - az.PublicIPAddressesClient = network.NewPublicIPAddressesClient(az.SubscriptionID) - az.PublicIPAddressesClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.PublicIPAddressesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.PublicIPAddressesClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.PublicIPAddressesClient.Client) + publicIPAddressClient := network.NewPublicIPAddressesClient(az.SubscriptionID) + publicIPAddressClient.BaseURI = az.Environment.ResourceManagerEndpoint + publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + publicIPAddressClient.PollingDelay = 5 * time.Second + configureUserAgent(&publicIPAddressClient.Client) + az.PublicIPAddressesClient = publicIPAddressClient - az.SecurityGroupsClient = network.NewSecurityGroupsClient(az.SubscriptionID) - az.SecurityGroupsClient.BaseURI = az.Environment.ResourceManagerEndpoint - az.SecurityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) - az.SecurityGroupsClient.PollingDelay = 5 * time.Second - configureUserAgent(&az.SecurityGroupsClient.Client) + securityGroupsClient := network.NewSecurityGroupsClient(az.SubscriptionID) + securityGroupsClient.BaseURI = az.Environment.ResourceManagerEndpoint + securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) + securityGroupsClient.PollingDelay = 5 * time.Second + configureUserAgent(&securityGroupsClient.Client) + az.SecurityGroupsClient = securityGroupsClient az.StorageAccountClient = storage.NewAccountsClientWithBaseURI(az.Environment.ResourceManagerEndpoint, az.SubscriptionID) az.StorageAccountClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken) @@ -327,6 +379,10 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) { az.metadata = NewInstanceMetadata() + if az.MaximumLoadBalancerRuleCount == 0 { + az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount + } + if err := initDiskControllers(&az); err != nil { return nil, err } diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index b30b1da38b7..32f3a5c0517 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -26,11 +26,25 @@ import ( "k8s.io/apimachinery/pkg/types" ) +// getorCreateRequestBackoff returns a new Backoff object steps = 1 +// This is to make sure that the requested command executes +// at least once +func (az *Cloud) getorCreateRequestBackoff() (resourceRequestBackoff wait.Backoff) { + if az.CloudProviderBackoff { + return az.resourceRequestBackoff + } + resourceRequestBackoff = wait.Backoff{ + Steps: 1, + } + + return resourceRequestBackoff +} + // GetVirtualMachineWithRetry invokes az.getVirtualMachine with exponential backoff retry func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) { var machine compute.VirtualMachine var exists bool - err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { var retryErr error machine, exists, retryErr = az.getVirtualMachine(name) if retryErr != nil { @@ -46,8 +60,9 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua // VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) { var machine compute.VirtualMachine - err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { var retryErr error + az.operationPollRateLimiter.Accept() machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types) if retryErr != nil { glog.Errorf("backoff: failure, will retry,err=%v", retryErr) @@ -59,10 +74,63 @@ func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, return machine, err } +// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry +func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, error) { + allNodes := []compute.VirtualMachine{} + var result compute.VirtualMachineListResult + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("VirtualMachinesClient.List(%v): start", az.ResourceGroup) + result, retryErr = az.VirtualMachinesClient.List(az.ResourceGroup) + glog.V(10).Infof("VirtualMachinesClient.List(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("VirtualMachinesClient.List(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, + retryErr) + return false, retryErr + } + glog.V(2).Infof("VirtualMachinesClient.List(%v) - backoff: success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return nil, err + } + + appendResults := (result.Value != nil && len(*result.Value) > 0) + for appendResults { + allNodes = append(allNodes, *result.Value...) + appendResults = false + // follow the next link to get all the vms for resource group + if result.NextLink != nil { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): start", az.ResourceGroup) + result, retryErr = az.VirtualMachinesClient.ListNextResults(result) + glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("VirtualMachinesClient.ListNextResults(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, retryErr) + return false, retryErr + } + glog.V(2).Infof("VirtualMachinesClient.ListNextResults(%v): success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return allNodes, err + } + appendResults = (result.Value != nil && len(*result.Value) > 0) + } + } + + return allNodes, err +} + // GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { var ip string - err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { var retryErr error ip, retryErr = az.getIPForMachine(name) if retryErr != nil { @@ -77,7 +145,7 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): start", *sg.Name) respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) @@ -90,7 +158,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { // CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): start", *lb.Name) respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil) @@ -101,9 +169,120 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error { }) } +// ListLBWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry +func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { + allLBs := []network.LoadBalancer{} + var result network.LoadBalancerListResult + + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("LoadBalancerClient.List(%v): start", az.ResourceGroup) + result, retryErr = az.LoadBalancerClient.List(az.ResourceGroup) + glog.V(10).Infof("LoadBalancerClient.List(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("LoadBalancerClient.List(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, + retryErr) + return false, retryErr + } + glog.V(2).Infof("LoadBalancerClient.List(%v) - backoff: success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return nil, err + } + + appendResults := (result.Value != nil && len(*result.Value) > 0) + for appendResults { + allLBs = append(allLBs, *result.Value...) + appendResults = false + + // follow the next link to get all the vms for resource group + if result.NextLink != nil { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): start", az.ResourceGroup) + result, retryErr = az.LoadBalancerClient.ListNextResults(result) + glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("LoadBalancerClient.ListNextResults(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, + retryErr) + return false, retryErr + } + glog.V(2).Infof("LoadBalancerClient.ListNextResults(%v) - backoff: success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return allLBs, err + } + appendResults = (result.Value != nil && len(*result.Value) > 0) + } + } + + return allLBs, nil +} + +// ListPIPWithRetry list the PIP resources in az.ResourceGroup +func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { + allPIPs := []network.PublicIPAddress{} + var result network.PublicIPAddressListResult + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("PublicIPAddressesClient.List(%v): start", az.ResourceGroup) + result, retryErr = az.PublicIPAddressesClient.List(az.ResourceGroup) + glog.V(10).Infof("PublicIPAddressesClient.List(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("PublicIPAddressesClient.List(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, + retryErr) + return false, retryErr + } + glog.V(2).Infof("PublicIPAddressesClient.List(%v) - backoff: success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return nil, err + } + + appendResults := (result.Value != nil && len(*result.Value) > 0) + for appendResults { + allPIPs = append(allPIPs, *result.Value...) + appendResults = false + + // follow the next link to get all the vms for resource group + if result.NextLink != nil { + err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + var retryErr error + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): start", az.ResourceGroup) + result, retryErr = az.PublicIPAddressesClient.ListNextResults(result) + glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): end", az.ResourceGroup) + if retryErr != nil { + glog.Errorf("PublicIPAddressesClient.ListNextResults(%v) - backoff: failure, will retry,err=%v", + az.ResourceGroup, + retryErr) + return false, retryErr + } + glog.V(2).Infof("PublicIPAddressesClient.ListNextResults(%v) - backoff: success", az.ResourceGroup) + return true, nil + }) + if err != nil { + return allPIPs, err + } + appendResults = (result.Value != nil && len(*result.Value) > 0) + } + } + + return allPIPs, nil +} + // CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s): start", *pip.Name) respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil) @@ -116,7 +295,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { // CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): start", *nic.Name) respChan, errChan := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil) @@ -129,7 +308,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { // DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.Delete(%s): start", pipName) respChan, errChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) @@ -142,7 +321,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { // DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry func (az *Cloud) DeleteLBWithRetry(lbName string) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.Delete(%s): start", lbName) respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) @@ -155,7 +334,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error { // CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%s): start", *routeTable.Name) respChan, errChan := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil) @@ -168,7 +347,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable // CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): start", *route.Name) respChan, errChan := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil) @@ -181,7 +360,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { // DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry func (az *Cloud) DeleteRouteWithRetry(routeName string) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.Delete(%s): start", az.RouteTableName) respChan, errChan := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil) @@ -194,7 +373,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error { // CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error { - return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s): start", vmName) respChan, errChan := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go new file mode 100644 index 00000000000..0351f4efaa6 --- /dev/null +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -0,0 +1,584 @@ +package azure + +import ( + "fmt" + "math/rand" + "net/http" + "strings" + "sync" + "time" + + "github.com/Azure/go-autorest/autorest/to" + + "github.com/Azure/azure-sdk-for-go/arm/compute" + "github.com/Azure/azure-sdk-for-go/arm/network" + "github.com/Azure/go-autorest/autorest" +) + +type fakeAzureLBClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]network.LoadBalancer +} + +func NewFakeAzureLBClient() fakeAzureLBClient { + fLBC := fakeAzureLBClient{} + fLBC.FakeStore = make(map[string]map[string]network.LoadBalancer) + fLBC.mutex = &sync.Mutex{} + return fLBC +} + +func (fLBC fakeAzureLBClient) CreateOrUpdate(resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, cancel <-chan struct{}) (<-chan network.LoadBalancer, <-chan error) { + fLBC.mutex.Lock() + defer fLBC.mutex.Unlock() + resultChan := make(chan network.LoadBalancer, 1) + errChan := make(chan error, 1) + var result network.LoadBalancer + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + if _, ok := fLBC.FakeStore[resourceGroupName]; !ok { + fLBC.FakeStore[resourceGroupName] = make(map[string]network.LoadBalancer) + } + + // For dynamic ip allocation, just fill in the PrivateIPAddress + if parameters.FrontendIPConfigurations != nil { + for idx, config := range *parameters.FrontendIPConfigurations { + if config.PrivateIPAllocationMethod == network.Dynamic { + (*parameters.FrontendIPConfigurations)[idx].PrivateIPAddress = to.StringPtr("10.0.0.19") + } + } + } + fLBC.FakeStore[resourceGroupName][loadBalancerName] = parameters + result = fLBC.FakeStore[resourceGroupName][loadBalancerName] + err = nil + return resultChan, errChan +} + +func (fLBC fakeAzureLBClient) Delete(resourceGroupName string, loadBalancerName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) { + fLBC.mutex.Lock() + defer fLBC.mutex.Unlock() + respChan := make(chan autorest.Response, 1) + errChan := make(chan error, 1) + var resp autorest.Response + var err error + defer func() { + respChan <- resp + errChan <- err + close(respChan) + close(errChan) + }() + if _, ok := fLBC.FakeStore[resourceGroupName]; ok { + if _, ok := fLBC.FakeStore[resourceGroupName][loadBalancerName]; ok { + delete(fLBC.FakeStore[resourceGroupName], loadBalancerName) + resp.Response = &http.Response{ + StatusCode: http.StatusAccepted, + } + err = nil + return respChan, errChan + } + } + resp.Response = &http.Response{ + StatusCode: http.StatusNotFound, + } + err = autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such LB", + } + return respChan, errChan +} + +func (fLBC fakeAzureLBClient) Get(resourceGroupName string, loadBalancerName string, expand string) (result network.LoadBalancer, err error) { + fLBC.mutex.Lock() + defer fLBC.mutex.Unlock() + if _, ok := fLBC.FakeStore[resourceGroupName]; ok { + if entity, ok := fLBC.FakeStore[resourceGroupName][loadBalancerName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such LB", + } +} + +func (fLBC fakeAzureLBClient) List(resourceGroupName string) (result network.LoadBalancerListResult, err error) { + fLBC.mutex.Lock() + defer fLBC.mutex.Unlock() + var value []network.LoadBalancer + if _, ok := fLBC.FakeStore[resourceGroupName]; ok { + for _, v := range fLBC.FakeStore[resourceGroupName] { + value = append(value, v) + } + } + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = &value + return result, nil +} + +func (fLBC fakeAzureLBClient) ListNextResults(lastResult network.LoadBalancerListResult) (result network.LoadBalancerListResult, err error) { + fLBC.mutex.Lock() + defer fLBC.mutex.Unlock() + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = nil + return result, nil +} + +type fakeAzurePIPClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]network.PublicIPAddress + SubscriptionID string +} + +const publicIPAddressIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/publicIPAddresses/%s" + +// returns the full identifier of a publicIPAddress. +func getpublicIPAddressID(subscriptionID string, resourceGroupName, pipName string) string { + return fmt.Sprintf( + publicIPAddressIDTemplate, + subscriptionID, + resourceGroupName, + pipName) +} + +func NewFakeAzurePIPClient(subscriptionID string) fakeAzurePIPClient { + fAPC := fakeAzurePIPClient{} + fAPC.FakeStore = make(map[string]map[string]network.PublicIPAddress) + fAPC.SubscriptionID = subscriptionID + fAPC.mutex = &sync.Mutex{} + return fAPC +} + +func (fAPC fakeAzurePIPClient) CreateOrUpdate(resourceGroupName string, publicIPAddressName string, parameters network.PublicIPAddress, cancel <-chan struct{}) (<-chan network.PublicIPAddress, <-chan error) { + fAPC.mutex.Lock() + defer fAPC.mutex.Unlock() + resultChan := make(chan network.PublicIPAddress, 1) + errChan := make(chan error, 1) + var result network.PublicIPAddress + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + if _, ok := fAPC.FakeStore[resourceGroupName]; !ok { + fAPC.FakeStore[resourceGroupName] = make(map[string]network.PublicIPAddress) + } + + // assign id + pipID := getpublicIPAddressID(fAPC.SubscriptionID, resourceGroupName, publicIPAddressName) + parameters.ID = &pipID + + // only create in the case user has not provided + if parameters.PublicIPAddressPropertiesFormat != nil && + parameters.PublicIPAddressPropertiesFormat.PublicIPAllocationMethod == network.Static { + // assign ip + rand.Seed(time.Now().UnixNano()) + randomIP := fmt.Sprintf("%d.%d.%d.%d", rand.Intn(256), rand.Intn(256), rand.Intn(256), rand.Intn(256)) + parameters.IPAddress = &randomIP + } + + fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters + result = fAPC.FakeStore[resourceGroupName][publicIPAddressName] + err = nil + return resultChan, errChan +} + +func (fAPC fakeAzurePIPClient) Delete(resourceGroupName string, publicIPAddressName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) { + fAPC.mutex.Lock() + defer fAPC.mutex.Unlock() + respChan := make(chan autorest.Response, 1) + errChan := make(chan error, 1) + var resp autorest.Response + var err error + defer func() { + respChan <- resp + errChan <- err + close(respChan) + close(errChan) + }() + if _, ok := fAPC.FakeStore[resourceGroupName]; ok { + if _, ok := fAPC.FakeStore[resourceGroupName][publicIPAddressName]; ok { + delete(fAPC.FakeStore[resourceGroupName], publicIPAddressName) + resp.Response = &http.Response{ + StatusCode: http.StatusAccepted, + } + err = nil + return respChan, errChan + } + } + resp.Response = &http.Response{ + StatusCode: http.StatusNotFound, + } + err = autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such PIP", + } + return respChan, errChan +} + +func (fAPC fakeAzurePIPClient) Get(resourceGroupName string, publicIPAddressName string, expand string) (result network.PublicIPAddress, err error) { + fAPC.mutex.Lock() + defer fAPC.mutex.Unlock() + if _, ok := fAPC.FakeStore[resourceGroupName]; ok { + if entity, ok := fAPC.FakeStore[resourceGroupName][publicIPAddressName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such PIP", + } +} + +func (fAPC fakeAzurePIPClient) ListNextResults(lastResults network.PublicIPAddressListResult) (result network.PublicIPAddressListResult, err error) { + fAPC.mutex.Lock() + defer fAPC.mutex.Unlock() + return network.PublicIPAddressListResult{}, nil +} + +func (fAPC fakeAzurePIPClient) List(resourceGroupName string) (result network.PublicIPAddressListResult, err error) { + fAPC.mutex.Lock() + defer fAPC.mutex.Unlock() + var value []network.PublicIPAddress + if _, ok := fAPC.FakeStore[resourceGroupName]; ok { + for _, v := range fAPC.FakeStore[resourceGroupName] { + value = append(value, v) + } + } + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = &value + return result, nil +} + +type fakeInterfacesClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]network.Interface +} + +func NewFakeInterfacesClient() fakeInterfacesClient { + fIC := fakeInterfacesClient{} + fIC.FakeStore = make(map[string]map[string]network.Interface) + fIC.mutex = &sync.Mutex{} + + return fIC +} + +func (fIC fakeInterfacesClient) CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) { + fIC.mutex.Lock() + defer fIC.mutex.Unlock() + resultChan := make(chan network.Interface, 1) + errChan := make(chan error, 1) + var result network.Interface + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + if _, ok := fIC.FakeStore[resourceGroupName]; !ok { + fIC.FakeStore[resourceGroupName] = make(map[string]network.Interface) + } + fIC.FakeStore[resourceGroupName][networkInterfaceName] = parameters + result = fIC.FakeStore[resourceGroupName][networkInterfaceName] + err = nil + + return resultChan, errChan +} + +func (fIC fakeInterfacesClient) Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) { + fIC.mutex.Lock() + defer fIC.mutex.Unlock() + if _, ok := fIC.FakeStore[resourceGroupName]; ok { + if entity, ok := fIC.FakeStore[resourceGroupName][networkInterfaceName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such Interface", + } +} + +type fakeVirtualMachinesClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]compute.VirtualMachine +} + +func NewFakeVirtualMachinesClient() fakeVirtualMachinesClient { + fVMC := fakeVirtualMachinesClient{} + fVMC.FakeStore = make(map[string]map[string]compute.VirtualMachine) + fVMC.mutex = &sync.Mutex{} + return fVMC +} + +func (fVMC fakeVirtualMachinesClient) CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) { + fVMC.mutex.Lock() + defer fVMC.mutex.Unlock() + resultChan := make(chan compute.VirtualMachine, 1) + errChan := make(chan error, 1) + var result compute.VirtualMachine + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + if _, ok := fVMC.FakeStore[resourceGroupName]; !ok { + fVMC.FakeStore[resourceGroupName] = make(map[string]compute.VirtualMachine) + } + fVMC.FakeStore[resourceGroupName][VMName] = parameters + result = fVMC.FakeStore[resourceGroupName][VMName] + err = nil + return resultChan, errChan +} + +func (fVMC fakeVirtualMachinesClient) Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) { + fVMC.mutex.Lock() + defer fVMC.mutex.Unlock() + if _, ok := fVMC.FakeStore[resourceGroupName]; ok { + if entity, ok := fVMC.FakeStore[resourceGroupName][VMName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such VM", + } +} + +func (fVMC fakeVirtualMachinesClient) List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) { + fVMC.mutex.Lock() + defer fVMC.mutex.Unlock() + var value []compute.VirtualMachine + if _, ok := fVMC.FakeStore[resourceGroupName]; ok { + for _, v := range fVMC.FakeStore[resourceGroupName] { + value = append(value, v) + } + } + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = &value + return result, nil +} +func (fVMC fakeVirtualMachinesClient) ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) { + fVMC.mutex.Lock() + defer fVMC.mutex.Unlock() + return compute.VirtualMachineListResult{}, nil +} + +type fakeAzureSubnetsClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]network.Subnet +} + +func NewFakeAzureSubnetsClient() fakeAzureSubnetsClient { + fASC := fakeAzureSubnetsClient{} + fASC.FakeStore = make(map[string]map[string]network.Subnet) + fASC.mutex = &sync.Mutex{} + return fASC +} + +func (fASC fakeAzureSubnetsClient) CreateOrUpdate(resourceGroupName string, virtualNetworkName string, subnetName string, subnetParameters network.Subnet, cancel <-chan struct{}) (<-chan network.Subnet, <-chan error) { + fASC.mutex.Lock() + defer fASC.mutex.Unlock() + resultChan := make(chan network.Subnet, 1) + errChan := make(chan error, 1) + var result network.Subnet + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND") + if _, ok := fASC.FakeStore[rgVnet]; !ok { + fASC.FakeStore[rgVnet] = make(map[string]network.Subnet) + } + fASC.FakeStore[rgVnet][subnetName] = subnetParameters + result = fASC.FakeStore[rgVnet][subnetName] + err = nil + return resultChan, errChan +} + +func (fASC fakeAzureSubnetsClient) Delete(resourceGroupName string, virtualNetworkName string, subnetName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) { + fASC.mutex.Lock() + defer fASC.mutex.Unlock() + respChan := make(chan autorest.Response, 1) + errChan := make(chan error, 1) + var resp autorest.Response + var err error + defer func() { + respChan <- resp + errChan <- err + close(respChan) + close(errChan) + }() + + rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND") + if _, ok := fASC.FakeStore[rgVnet]; ok { + if _, ok := fASC.FakeStore[rgVnet][subnetName]; ok { + delete(fASC.FakeStore[rgVnet], subnetName) + resp.Response = &http.Response{ + StatusCode: http.StatusAccepted, + } + err = nil + return respChan, errChan + } + } + resp.Response = &http.Response{ + StatusCode: http.StatusNotFound, + } + err = autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such Subnet", + } + return respChan, errChan +} +func (fASC fakeAzureSubnetsClient) Get(resourceGroupName string, virtualNetworkName string, subnetName string, expand string) (result network.Subnet, err error) { + fASC.mutex.Lock() + defer fASC.mutex.Unlock() + rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND") + if _, ok := fASC.FakeStore[rgVnet]; ok { + if entity, ok := fASC.FakeStore[rgVnet][subnetName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such Subnet", + } +} +func (fASC fakeAzureSubnetsClient) List(resourceGroupName string, virtualNetworkName string) (result network.SubnetListResult, err error) { + fASC.mutex.Lock() + defer fASC.mutex.Unlock() + rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND") + var value []network.Subnet + if _, ok := fASC.FakeStore[rgVnet]; ok { + for _, v := range fASC.FakeStore[rgVnet] { + value = append(value, v) + } + } + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = &value + return result, nil +} + +type fakeAzureNSGClient struct { + mutex *sync.Mutex + FakeStore map[string]map[string]network.SecurityGroup +} + +func NewFakeAzureNSGClient() fakeAzureNSGClient { + fNSG := fakeAzureNSGClient{} + fNSG.FakeStore = make(map[string]map[string]network.SecurityGroup) + fNSG.mutex = &sync.Mutex{} + return fNSG +} + +func (fNSG fakeAzureNSGClient) CreateOrUpdate(resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, cancel <-chan struct{}) (<-chan network.SecurityGroup, <-chan error) { + fNSG.mutex.Lock() + defer fNSG.mutex.Unlock() + resultChan := make(chan network.SecurityGroup, 1) + errChan := make(chan error, 1) + var result network.SecurityGroup + var err error + defer func() { + resultChan <- result + errChan <- err + close(resultChan) + close(errChan) + }() + if _, ok := fNSG.FakeStore[resourceGroupName]; !ok { + fNSG.FakeStore[resourceGroupName] = make(map[string]network.SecurityGroup) + } + fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] = parameters + result = fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] + err = nil + return resultChan, errChan +} + +func (fNSG fakeAzureNSGClient) Delete(resourceGroupName string, networkSecurityGroupName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) { + fNSG.mutex.Lock() + defer fNSG.mutex.Unlock() + respChan := make(chan autorest.Response, 1) + errChan := make(chan error, 1) + var resp autorest.Response + var err error + defer func() { + respChan <- resp + errChan <- err + close(respChan) + close(errChan) + }() + if _, ok := fNSG.FakeStore[resourceGroupName]; ok { + if _, ok := fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]; ok { + delete(fNSG.FakeStore[resourceGroupName], networkSecurityGroupName) + resp.Response = &http.Response{ + StatusCode: http.StatusAccepted, + } + err = nil + return respChan, errChan + } + } + resp.Response = &http.Response{ + StatusCode: http.StatusNotFound, + } + err = autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such NSG", + } + return respChan, errChan +} + +func (fNSG fakeAzureNSGClient) Get(resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error) { + fNSG.mutex.Lock() + defer fNSG.mutex.Unlock() + if _, ok := fNSG.FakeStore[resourceGroupName]; ok { + if entity, ok := fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]; ok { + return entity, nil + } + } + return result, autorest.DetailedError{ + StatusCode: http.StatusNotFound, + Message: "Not such NSG", + } +} + +func (fNSG fakeAzureNSGClient) List(resourceGroupName string) (result network.SecurityGroupListResult, err error) { + fNSG.mutex.Lock() + defer fNSG.mutex.Unlock() + var value []network.SecurityGroup + if _, ok := fNSG.FakeStore[resourceGroupName]; ok { + for _, v := range fNSG.FakeStore[resourceGroupName] { + value = append(value, v) + } + } + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } + result.NextLink = nil + result.Value = &value + return result, nil +} diff --git a/pkg/cloudprovider/providers/azure/azure_instances.go b/pkg/cloudprovider/providers/azure/azure_instances.go index 0af7eec2921..fe9ed07ae06 100644 --- a/pkg/cloudprovider/providers/azure/azure_instances.go +++ b/pkg/cloudprovider/providers/azure/azure_instances.go @@ -199,39 +199,6 @@ func (az *Cloud) CurrentNodeName(hostname string) (types.NodeName, error) { return types.NodeName(hostname), nil } -func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error) { - allNodes := []compute.VirtualMachine{} - - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("VirtualMachinesClient.List(%s): start", az.ResourceGroup) - result, err := az.VirtualMachinesClient.List(az.ResourceGroup) - glog.V(10).Infof("VirtualMachinesClient.List(%s): end", az.ResourceGroup) - if err != nil { - glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.List(%s), err=%v", az.ResourceGroup, err) - return nil, err - } - - morePages := (result.Value != nil && len(*result.Value) > 1) - - for morePages { - allNodes = append(allNodes, *result.Value...) - - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("VirtualMachinesClient.ListAllNextResults(%v): start", az.ResourceGroup) - result, err = az.VirtualMachinesClient.ListAllNextResults(result) - glog.V(10).Infof("VirtualMachinesClient.ListAllNextResults(%v): end", az.ResourceGroup) - if err != nil { - glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.ListAllNextResults(%v), err=%v", result, err) - return nil, err - } - - morePages = (result.Value != nil && len(*result.Value) > 1) - } - - return allNodes, nil - -} - // mapNodeNameToVMName maps a k8s NodeName to an Azure VM Name // This is a simple string cast. func mapNodeNameToVMName(nodeName types.NodeName) string { diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 3b970e87eda..2afb6568303 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -39,86 +39,31 @@ const ServiceAnnotationLoadBalancerInternal = "service.beta.kubernetes.io/azure- // to specify what subnet it is exposed on const ServiceAnnotationLoadBalancerInternalSubnet = "service.beta.kubernetes.io/azure-load-balancer-internal-subnet" +// ServiceAnnotationLoadBalancerMode is the annotation used on the service to specify the +// Azure load balancer selection based on availability sets +const ServiceAnnotationLoadBalancerMode = "service.beta.kubernetes.io/azure-load-balancer-mode" + +// ServiceAnnotationLoadBalancerAutoModeValue the annotation used on the service to specify the +// Azure load balancer auto selection from the availability sets +const ServiceAnnotationLoadBalancerAutoModeValue = "__auto__" + // ServiceAnnotationDNSLabelName annotation speficying the DNS label name for the service. const ServiceAnnotationDNSLabelName = "service.beta.kubernetes.io/azure-dns-label-name" // GetLoadBalancer returns whether the specified load balancer exists, and // if so, what its status is. func (az *Cloud) GetLoadBalancer(clusterName string, service *v1.Service) (status *v1.LoadBalancerStatus, exists bool, err error) { - isInternal := requiresInternalLoadBalancer(service) - lbName := getLoadBalancerName(clusterName, isInternal) - serviceName := getServiceName(service) - - lb, existsLb, err := az.getAzureLoadBalancer(lbName) + _, status, exists, err = az.getServiceLoadBalancer(service, clusterName, nil, false) if err != nil { return nil, false, err } - if !existsLb { - glog.V(5).Infof("get(%s): lb(%s) - doesn't exist", serviceName, lbName) - return nil, false, nil + if exists == false { + serviceName := getServiceName(service) + glog.V(5).Infof("getloadbalancer (cluster:%s) (service:%s)- IP doesn't exist in any of the lbs", clusterName, serviceName) + return nil, false, fmt.Errorf("Service(%s) - Loadbalancer not found", serviceName) } - var lbIP *string - - if isInternal { - lbFrontendIPConfigName := getFrontendIPConfigName(service, subnet(service)) - for _, ipConfiguration := range *lb.FrontendIPConfigurations { - if lbFrontendIPConfigName == *ipConfiguration.Name { - lbIP = ipConfiguration.PrivateIPAddress - break - } - } - } else { - // TODO: Consider also read address from lb's FrontendIPConfigurations - pipName, err := az.determinePublicIPName(clusterName, service) - if err != nil { - return nil, false, err - } - pip, existsPip, err := az.getPublicIPAddress(pipName) - if err != nil { - return nil, false, err - } - if existsPip { - lbIP = pip.IPAddress - } - } - - if lbIP == nil { - glog.V(5).Infof("get(%s): lb(%s) - IP doesn't exist", serviceName, lbName) - return nil, false, nil - } - - return &v1.LoadBalancerStatus{ - Ingress: []v1.LoadBalancerIngress{{IP: *lbIP}}, - }, true, nil -} - -func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service) (string, error) { - loadBalancerIP := service.Spec.LoadBalancerIP - if len(loadBalancerIP) == 0 { - return getPublicIPName(clusterName, service), nil - } - - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("PublicIPAddressesClient.List(%v): start", az.ResourceGroup) - list, err := az.PublicIPAddressesClient.List(az.ResourceGroup) - glog.V(10).Infof("PublicIPAddressesClient.List(%v): end", az.ResourceGroup) - if err != nil { - return "", err - } - - if list.Value != nil { - for ix := range *list.Value { - ip := &(*list.Value)[ix] - if ip.PublicIPAddressPropertiesFormat.IPAddress != nil && - *ip.PublicIPAddressPropertiesFormat.IPAddress == loadBalancerIP { - return *ip.Name, nil - } - } - } - // TODO: follow next link here? Will there really ever be that many public IPs? - - return "", fmt.Errorf("user supplied IP Address %s was not found", loadBalancerIP) + return status, true, nil } func getPublicIPLabel(service *v1.Service) string { @@ -130,193 +75,35 @@ func getPublicIPLabel(service *v1.Service) string { // EnsureLoadBalancer creates a new load balancer 'name', or updates the existing one. Returns the status of the balancer func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) { - isInternal := requiresInternalLoadBalancer(service) - lbName := getLoadBalancerName(clusterName, isInternal) - // When a client updates the internal load balancer annotation, // the service may be switched from an internal LB to a public one, or vise versa. // Here we'll firstly ensure service do not lie in the opposite LB. - err := az.cleanupLoadBalancer(clusterName, service, !isInternal) - if err != nil { - return nil, err - } - serviceName := getServiceName(service) - glog.V(5).Infof("ensure(%s): START clusterName=%q lbName=%q", serviceName, clusterName, lbName) + glog.V(5).Infof("ensureloadbalancer(%s): START clusterName=%q", serviceName, clusterName) + flipedService := flipServiceInternalAnnotation(service) + if _, err := az.reconcileLoadBalancer(clusterName, flipedService, nil, false /* wantLb */); err != nil { + return nil, err + } - lb, existsLb, err := az.getAzureLoadBalancer(lbName) + if _, err := az.reconcilePublicIP(clusterName, service, true /* wantLb */); err != nil { + return nil, err + } + + lb, err := az.reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */) if err != nil { return nil, err } - if !existsLb { - lb = network.LoadBalancer{ - Name: &lbName, - Location: &az.Location, - LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{}, - } - } - var lbIP *string - var fipConfigurationProperties *network.FrontendIPConfigurationPropertiesFormat - - if isInternal { - subnetName := subnet(service) - if subnetName == nil { - subnetName = &az.SubnetName - } - subnet, existsSubnet, err := az.getSubnet(az.VnetName, *subnetName) - if err != nil { - return nil, err - } - - if !existsSubnet { - return nil, fmt.Errorf("ensure(%s): lb(%s) - failed to get subnet: %s/%s", serviceName, lbName, az.VnetName, az.SubnetName) - } - - configProperties := network.FrontendIPConfigurationPropertiesFormat{ - Subnet: &network.Subnet{ - ID: subnet.ID, - }, - } - - loadBalancerIP := service.Spec.LoadBalancerIP - if loadBalancerIP != "" { - configProperties.PrivateIPAllocationMethod = network.Static - configProperties.PrivateIPAddress = &loadBalancerIP - lbIP = &loadBalancerIP - } else { - // We'll need to call GetLoadBalancer later to retrieve allocated IP. - configProperties.PrivateIPAllocationMethod = network.Dynamic - } - - fipConfigurationProperties = &configProperties - } else { - pipName, err := az.determinePublicIPName(clusterName, service) - if err != nil { - return nil, err - } - domainNameLabel := getPublicIPLabel(service) - pip, err := az.ensurePublicIPExists(serviceName, pipName, domainNameLabel) - if err != nil { - return nil, err - } - - lbIP = pip.IPAddress - fipConfigurationProperties = &network.FrontendIPConfigurationPropertiesFormat{ - PublicIPAddress: &network.PublicIPAddress{ID: pip.ID}, - } - } - - lb, lbNeedsUpdate, err := az.reconcileLoadBalancer(lb, fipConfigurationProperties, clusterName, service, nodes) + lbStatus, err := az.getServiceLoadBalancerStatus(service, lb) if err != nil { return nil, err } - if !existsLb || lbNeedsUpdate { - glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): start", *lb.Name) - respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): end", *lb.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName) - retryErr := az.CreateOrUpdateLBWithRetry(lb) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName) - return nil, retryErr - } - } - if err != nil { - return nil, err - } - } - var lbStatus *v1.LoadBalancerStatus - if lbIP == nil { - lbStatus, exists, err := az.GetLoadBalancer(clusterName, service) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("ensure(%s): lb(%s) - failed to get back load balancer", serviceName, lbName) - } - lbIP = &lbStatus.Ingress[0].IP - } - - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName) - sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") - glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName) - if err != nil { + if _, err := az.reconcileSecurityGroup(clusterName, service, lbStatus, true /* wantLb */); err != nil { return nil, err } - sg, sgNeedsUpdate, err := az.reconcileSecurityGroup(sg, clusterName, service, lbIP, true /* wantLb */) - if err != nil { - return nil, err - } - if sgNeedsUpdate { - glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name) - respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name) - retryErr := az.CreateOrUpdateSGWithRetry(sg) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) - return nil, retryErr - } - } - if err != nil { - return nil, err - } - } - // Add the machines to the backend pool if they're not already - lbBackendName := getBackendPoolName(clusterName) - lbBackendPoolID := az.getBackendPoolID(lbName, lbBackendName) - hostUpdates := make([]func() error, len(nodes)) - for i, node := range nodes { - localNodeName := node.Name - f := func() error { - err := az.ensureHostInPool(serviceName, types.NodeName(localNodeName), lbBackendPoolID) - if err != nil { - return fmt.Errorf("ensure(%s): lb(%s) - failed to ensure host in pool: %q", serviceName, lbName, err) - } - return nil - } - hostUpdates[i] = f - } - - errs := utilerrors.AggregateGoroutines(hostUpdates...) - if errs != nil { - return nil, utilerrors.Flatten(errs) - } - - glog.V(2).Infof("ensure(%s): lb(%s) finished", serviceName, lbName) - - if lbStatus != nil { - return lbStatus, nil - } - - if lbIP == nil { - lbStatus, exists, err := az.GetLoadBalancer(clusterName, service) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("ensure(%s): lb(%s) - failed to get back load balancer", serviceName, lbName) - } - return lbStatus, nil - } - - return &v1.LoadBalancerStatus{ - Ingress: []v1.LoadBalancerIngress{{IP: *lbIP}}, - }, nil + return lbStatus, nil } // UpdateLoadBalancer updates hosts under the specified load balancer. @@ -332,146 +119,152 @@ func (az *Cloud) UpdateLoadBalancer(clusterName string, service *v1.Service, nod // have multiple underlying components, meaning a Get could say that the LB // doesn't exist even if some part of it is still laying around. func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Service) error { - isInternal := requiresInternalLoadBalancer(service) - lbName := getLoadBalancerName(clusterName, isInternal) serviceName := getServiceName(service) - - glog.V(5).Infof("delete(%s): START clusterName=%q lbName=%q", serviceName, clusterName, lbName) - - err := az.cleanupLoadBalancer(clusterName, service, isInternal) - if err != nil { + glog.V(5).Infof("delete(%s): START clusterName=%q", serviceName, clusterName) + if _, err := az.reconcileSecurityGroup(clusterName, service, nil, false /* wantLb */); err != nil { return err } - sg, existsSg, err := az.getSecurityGroup() - if err != nil { + if _, err := az.reconcileLoadBalancer(clusterName, service, nil, false /* wantLb */); err != nil { return err } - if existsSg { - reconciledSg, sgNeedsUpdate, reconcileErr := az.reconcileSecurityGroup(sg, clusterName, service, nil, false /* wantLb */) - if reconcileErr != nil { - return reconcileErr - } - if sgNeedsUpdate { - glog.V(3).Infof("delete(%s): sg(%s) - updating", serviceName, az.SecurityGroupName) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *reconciledSg.Name) - respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *reconciledSg.Name, reconciledSg, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *reconciledSg.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName) - retryErr := az.CreateOrUpdateSGWithRetry(reconciledSg) - if retryErr != nil { - err = retryErr - glog.V(2).Infof("delete(%s) abort backoff: sg(%s) - updating", serviceName, az.SecurityGroupName) - } - } - if err != nil { - return err - } - } + + if _, err := az.reconcilePublicIP(clusterName, service, false /* wantLb */); err != nil { + return err } glog.V(2).Infof("delete(%s): FINISH", serviceName) return nil } -func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, isInternalLb bool) error { - lbName := getLoadBalancerName(clusterName, isInternalLb) - serviceName := getServiceName(service) +// getServiceLoadBalancer gets the loadbalancer for the service if it already exits +// If wantLb is TRUE then -it selects a new load balancer +// In case the selected load balancer does not exists it returns network.LoadBalancer struct +// with added metadata (such as name, location) and existsLB set to FALSE +// By default - cluster default LB is returned +func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node, wantLb bool) (lb *network.LoadBalancer, status *v1.LoadBalancerStatus, exists bool, err error) { + isInternal := requiresInternalLoadBalancer(service) + var defaultLB *network.LoadBalancer + defaultLBName := az.getLoadBalancerName(clusterName, az.Config.PrimaryAvailabilitySetName, isInternal) - glog.V(10).Infof("ensure lb deleted: clusterName=%q, serviceName=%s, lbName=%q", clusterName, serviceName, lbName) - - lb, existsLb, err := az.getAzureLoadBalancer(lbName) + lbs, err := az.ListLBWithRetry() if err != nil { - return err + return nil, nil, false, err } - if existsLb { - var publicIPToCleanup *string - - if !isInternalLb { - // Find public ip resource to clean up from IP configuration - lbFrontendIPConfigName := getFrontendIPConfigName(service, nil) - for _, config := range *lb.FrontendIPConfigurations { - if strings.EqualFold(*config.Name, lbFrontendIPConfigName) { - if config.PublicIPAddress != nil { - // Only ID property is available - publicIPToCleanup = config.PublicIPAddress.ID - } - break - } + if lbs != nil { + for lbx := range lbs { + lb := &(lbs[lbx]) + if strings.EqualFold(*lb.Name, defaultLBName) { + defaultLB = lb } + if isInternalLoadBalancer(lb) != isInternal { + continue + } + status, err = az.getServiceLoadBalancerStatus(service, lb) + if err != nil { + return nil, nil, false, err + } + if status == nil { + // service is not om this load balancer + continue + } + + return lb, status, true, nil + } + } + // service does not have a load balancer, select one + if wantLb { + // select new load balancer for service + lb, exists, err = az.selectLoadBalancer(clusterName, service, &lbs, nodes) + if err != nil { + return nil, nil, false, err } - lb, lbNeedsUpdate, reconcileErr := az.reconcileLoadBalancer(lb, nil, clusterName, service, []*v1.Node{}) - if reconcileErr != nil { - return reconcileErr + return lb, nil, exists, err + } + if defaultLB == nil { + defaultLB = &network.LoadBalancer{ + Name: &defaultLBName, + Location: &az.Location, + LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{}, } - if lbNeedsUpdate { - if len(*lb.FrontendIPConfigurations) > 0 { - glog.V(3).Infof("delete(%s): lb(%s) - updating", serviceName, lbName) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): start", *lb.Name) - respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): end", *lb.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName) - retryErr := az.CreateOrUpdateLBWithRetry(lb) - if retryErr != nil { - err = retryErr - glog.V(2).Infof("delete(%s) abort backoff: sg(%s) - updating", serviceName, az.SecurityGroupName) - } - } - if err != nil { - return err - } + } + + return defaultLB, nil, false, nil +} + +func (az *Cloud) getServiceLoadBalancerStatus(service *v1.Service, lb *network.LoadBalancer) (status *v1.LoadBalancerStatus, err error) { + if lb == nil { + glog.V(10).Infof("getServiceLoadBalancerStatus lb is nil") + return nil, nil + } + if lb.FrontendIPConfigurations == nil || *lb.FrontendIPConfigurations == nil { + return nil, nil + } + isInternal := requiresInternalLoadBalancer(service) + lbFrontendIPConfigName := getFrontendIPConfigName(service, subnet(service)) + serviceName := getServiceName(service) + for _, ipConfiguration := range *lb.FrontendIPConfigurations { + if lbFrontendIPConfigName == *ipConfiguration.Name { + var lbIP *string + if isInternal { + lbIP = ipConfiguration.PrivateIPAddress } else { - glog.V(3).Infof("delete(%s): lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) - - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("LoadBalancerClient.Delete(%q): start", lbName) - respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) { - glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) - retryErr := az.DeleteLBWithRetry(lbName) - if retryErr != nil { - err = retryErr - glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) - } + if ipConfiguration.PublicIPAddress == nil { + return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress is Nil", serviceName, *lb.Name) } + pipID := ipConfiguration.PublicIPAddress.ID + if pipID == nil { + return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress ID is Nil", serviceName, *lb.Name) + } + pipName, err := getLastSegment(*pipID) if err != nil { - return err + return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress Name from ID(%s)", serviceName, *lb.Name, *pipID) + } + pip, existsPip, err := az.getPublicIPAddress(pipName) + if err != nil { + return nil, err + } + if existsPip { + lbIP = pip.IPAddress } } - } - // Public IP can be deleted after frontend ip configuration rule deleted. - if publicIPToCleanup != nil { - // Only delete an IP address if we created it, deducing by name. - if index := strings.LastIndex(*publicIPToCleanup, "/"); index != -1 { - managedPipName := getPublicIPName(clusterName, service) - pipName := (*publicIPToCleanup)[index+1:] - if strings.EqualFold(managedPipName, pipName) { - glog.V(5).Infof("Deleting public IP resource %q.", pipName) - err = az.ensurePublicIPDeleted(serviceName, pipName) - if err != nil { - return err - } - } else { - glog.V(5).Infof("Public IP resource %q found, but it does not match managed name %q, skip deleting.", pipName, managedPipName) - } - } + return &v1.LoadBalancerStatus{Ingress: []v1.LoadBalancerIngress{{IP: *lbIP}}}, nil } } - return nil + return nil, nil +} + +func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service) (string, error) { + loadBalancerIP := service.Spec.LoadBalancerIP + if len(loadBalancerIP) == 0 { + return getPublicIPName(clusterName, service), nil + } + + pips, err := az.ListPIPWithRetry() + if err != nil { + return "", err + } + + for _, pip := range pips { + if pip.PublicIPAddressPropertiesFormat.IPAddress != nil && + *pip.PublicIPAddressPropertiesFormat.IPAddress == loadBalancerIP { + return *pip.Name, nil + } + } + return "", fmt.Errorf("user supplied IP Address %s was not found", loadBalancerIP) +} + +func flipServiceInternalAnnotation(service *v1.Service) *v1.Service { + copyService := service.DeepCopy() + if _, ok := copyService.Annotations[ServiceAnnotationLoadBalancerInternal]; ok { + delete(copyService.Annotations, ServiceAnnotationLoadBalancerInternal) + } else { + copyService.Annotations[ServiceAnnotationLoadBalancerInternal] = "true" + } + return copyService } func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel string) (*network.PublicIPAddress, error) { @@ -494,7 +287,6 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel stri } } pip.Tags = &map[string]*string{"service": &serviceName} - glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name) az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%q): start", *pip.Name) @@ -523,44 +315,27 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel stri } return &pip, nil - -} - -func (az *Cloud) ensurePublicIPDeleted(serviceName, pipName string) error { - glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): start", pipName) - resp, deleteErrChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) - deleteErr := <-deleteErrChan - glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): end", pipName) // response not read yet... - if az.CloudProviderBackoff && shouldRetryAPIRequest(<-resp, deleteErr) { - glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName) - retryErr := az.DeletePublicIPWithRetry(pipName) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName) - return retryErr - } - } - _, realErr := checkResourceExistsFromError(deleteErr) - if realErr != nil { - return nil - } - return nil } // This ensures load balancer exists and the frontend ip config is setup. // This also reconciles the Service's Ports with the LoadBalancer config. // This entails adding rules/probes for expected Ports and removing stale rules/ports. -func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfigurationProperties *network.FrontendIPConfigurationPropertiesFormat, clusterName string, service *v1.Service, nodes []*v1.Node) (network.LoadBalancer, bool, error) { +// nodes only used if wantLB is true +func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node, wantLb bool) (*network.LoadBalancer, error) { isInternal := requiresInternalLoadBalancer(service) - lbName := getLoadBalancerName(clusterName, isInternal) serviceName := getServiceName(service) + glog.V(2).Infof("reconcileLoadBalancer(%s) - wantLB(%t): started", serviceName, wantLb) + lb, _, _, err := az.getServiceLoadBalancer(service, clusterName, nodes, wantLb) + if err != nil { + return nil, err + } + lbName := *lb.Name + glog.V(2).Infof("reconcileLoadBalancer(%s): lb(%s) wantLB(%t) resolved load balancer name", serviceName, lbName, wantLb) lbFrontendIPConfigName := getFrontendIPConfigName(service, subnet(service)) lbFrontendIPConfigID := az.getFrontendIPConfigID(lbName, lbFrontendIPConfigName) lbBackendPoolName := getBackendPoolName(clusterName) lbBackendPoolID := az.getBackendPoolID(lbName, lbBackendPoolName) - wantLb := fipConfigurationProperties != nil dirtyLb := false // Ensure LoadBalancer's Backend Pool Configuration @@ -597,6 +372,7 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration if lb.FrontendIPConfigurations != nil { newConfigs = *lb.FrontendIPConfigurations } + if !wantLb { for i := len(newConfigs) - 1; i >= 0; i-- { config := newConfigs[i] @@ -625,6 +401,51 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration } } if !foundConfig { + // construct FrontendIPConfigurationPropertiesFormat + var fipConfigurationProperties *network.FrontendIPConfigurationPropertiesFormat + if isInternal { + subnetName := subnet(service) + if subnetName == nil { + subnetName = &az.SubnetName + } + subnet, existsSubnet, err := az.getSubnet(az.VnetName, *subnetName) + if err != nil { + return nil, err + } + + if !existsSubnet { + return nil, fmt.Errorf("ensure(%s): lb(%s) - failed to get subnet: %s/%s", serviceName, lbName, az.VnetName, az.SubnetName) + } + + configProperties := network.FrontendIPConfigurationPropertiesFormat{ + Subnet: &subnet, + } + + loadBalancerIP := service.Spec.LoadBalancerIP + if loadBalancerIP != "" { + configProperties.PrivateIPAllocationMethod = network.Static + configProperties.PrivateIPAddress = &loadBalancerIP + } else { + // We'll need to call GetLoadBalancer later to retrieve allocated IP. + configProperties.PrivateIPAllocationMethod = network.Dynamic + } + + fipConfigurationProperties = &configProperties + } else { + pipName, err := az.determinePublicIPName(clusterName, service) + if err != nil { + return nil, err + } + domainNameLabel := getPublicIPLabel(service) + pip, err := az.ensurePublicIPExists(serviceName, pipName, domainNameLabel) + if err != nil { + return nil, err + } + fipConfigurationProperties = &network.FrontendIPConfigurationPropertiesFormat{ + PublicIPAddress: &network.PublicIPAddress{ID: pip.ID}, + } + } + newConfigs = append(newConfigs, network.FrontendIPConfiguration{ Name: to.StringPtr(lbFrontendIPConfigName), @@ -654,7 +475,7 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration transportProto, _, probeProto, err := getProtocolsFromKubernetesProtocol(port.Protocol) if err != nil { - return lb, false, err + return nil, err } if serviceapi.NeedsHealthCheck(service) { @@ -662,7 +483,7 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration // ERROR: this isn't supported // health check (aka source ip preservation) is not // compatible with UDP (it uses an HTTP check) - return lb, false, fmt.Errorf("services requiring health checks are incompatible with UDP ports") + return nil, fmt.Errorf("services requiring health checks are incompatible with UDP ports") } podPresencePath, podPresencePort := serviceapi.GetServiceHealthCheckPathPort(service) @@ -803,24 +624,115 @@ func (az *Cloud) reconcileLoadBalancer(lb network.LoadBalancer, fipConfiguration lb.LoadBalancingRules = &updatedRules } - return lb, dirtyLb, nil + // We don't care if the LB exists or not + // We only care about if there is any change in the LB, which means dirtyLB + // If it is not exist, and no change to that, we don't CreateOrUpdate LB + if dirtyLb { + if lb.FrontendIPConfigurations == nil || len(*lb.FrontendIPConfigurations) == 0 { + // When FrontendIPConfigurations is empty, we need to delete the Azure LoadBalancer resource itself + // Because delete all FrontendIPConfigurations in LB is not supported, we have to delete the LB itself + glog.V(3).Infof("delete(%s): lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) + + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("LoadBalancerClient.Delete(%q): start", lbName) + respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) + resp := <-respChan + err := <-errChan + glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName) + if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) { + glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) + retryErr := az.DeleteLBWithRetry(lbName) + if retryErr != nil { + err = retryErr + glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) + } + } + if err != nil { + return nil, err + } + + } else { + glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName) + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): start", lbName) + respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, lbName, *lb, nil) + resp := <-respChan + err := <-errChan + glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): end", lbName) + if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { + glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName) + retryErr := az.CreateOrUpdateLBWithRetry(*lb) + if retryErr != nil { + glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName) + return nil, retryErr + } + } + if err != nil { + return nil, err + } + } + } + + if wantLb && nodes != nil { + // Add the machines to the backend pool if they're not already + availabilitySetName := az.mapLoadBalancerNameToAvailabilitySet(lbName, clusterName) + hostUpdates := make([]func() error, len(nodes)) + for i, node := range nodes { + localNodeName := node.Name + f := func() error { + err := az.ensureHostInPool(serviceName, types.NodeName(localNodeName), lbBackendPoolID, availabilitySetName) + if err != nil { + return fmt.Errorf("ensure(%s): lb(%s) - failed to ensure host in pool: %q", serviceName, lbName, err) + } + return nil + } + hostUpdates[i] = f + } + + errs := utilerrors.AggregateGoroutines(hostUpdates...) + if errs != nil { + return nil, utilerrors.Flatten(errs) + } + } + + glog.V(2).Infof("ensure(%s): lb(%s) finished", serviceName, lbName) + return lb, nil } // This reconciles the Network Security Group similar to how the LB is reconciled. // This entails adding required, missing SecurityRules and removing stale rules. -func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName string, service *v1.Service, lbIP *string, wantLb bool) (network.SecurityGroup, bool, error) { +func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, lbStatus *v1.LoadBalancerStatus, wantLb bool) (*network.SecurityGroup, error) { serviceName := getServiceName(service) + glog.V(5).Infof("ensure(%s): START clusterName=%q lbName=%q", serviceName, clusterName) + var ports []v1.ServicePort if wantLb { ports = service.Spec.Ports } else { ports = []v1.ServicePort{} } + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName) + sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") + glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName) + if err != nil { + return nil, err + } + + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName) + sg, err = az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") + glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName) + if err != nil { + return nil, err + } destinationIPAddress := "" if wantLb { + // Get lbIP since we make up NSG rules based on ingress IP + lbIP := &lbStatus.Ingress[0].IP if lbIP == nil { - return sg, false, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) + return &sg, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) } destinationIPAddress = *lbIP } @@ -830,7 +742,7 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st sourceRanges, err := serviceapi.GetLoadBalancerSourceRanges(service) if err != nil { - return sg, false, err + return nil, err } var sourceAddressPrefixes []string if sourceRanges == nil || serviceapi.IsAllowAll(sourceRanges) { @@ -847,7 +759,7 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st for i, port := range ports { _, securityProto, _, err := getProtocolsFromKubernetesProtocol(port.Protocol) if err != nil { - return sg, false, err + return nil, err } for j := range sourceAddressPrefixes { ix := i*len(sourceAddressPrefixes) + j @@ -902,7 +814,7 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st nextAvailablePriority, err := getNextAvailablePriority(updatedRules) if err != nil { - return sg, false, err + return nil, err } expectedRule.Priority = to.Int32Ptr(nextAvailablePriority) @@ -912,8 +824,90 @@ func (az *Cloud) reconcileSecurityGroup(sg network.SecurityGroup, clusterName st } if dirtySg { sg.SecurityRules = &updatedRules + glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name) + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name) + respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) + resp := <-respChan + err := <-errChan + glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name) + if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { + glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name) + retryErr := az.CreateOrUpdateSGWithRetry(sg) + if retryErr != nil { + glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) + return nil, retryErr + } + } + if err != nil { + return nil, err + } } - return sg, dirtySg, nil + return &sg, nil +} + +// This reconciles the PublicIP resources similar to how the LB is reconciled. +// This entails adding required, missing SecurityRules and removing stale rules. +func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error) { + isInternal := requiresInternalLoadBalancer(service) + serviceName := getServiceName(service) + desiredPipName, err := az.determinePublicIPName(clusterName, service) + if err != nil { + return nil, err + } + + pips, err := az.ListPIPWithRetry() + if err != nil { + return nil, err + } + + for _, pip := range pips { + if pip.Tags != nil && + (*pip.Tags)["service"] != nil && + *(*pip.Tags)["service"] == serviceName { + // We need to process for pips belong to this service + pipName := *pip.Name + if wantLb && !isInternal && pipName == desiredPipName { + // This is the only case we should preserve the + // Public ip resource with match service tag + // We could do nothing here, we will ensure that out of the loop + } else { + // We use tag to decide which IP should be removed + glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName) + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): start", pipName) + resp, deleteErrChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) + deleteErr := <-deleteErrChan + glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): end", pipName) // response not read yet... + if az.CloudProviderBackoff && shouldRetryAPIRequest(<-resp, deleteErr) { + glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName) + retryErr := az.DeletePublicIPWithRetry(pipName) + if retryErr != nil { + glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName) + return nil, retryErr + } + } + + deleteErr = ignoreStatusNotFoundFromError(deleteErr) + if deleteErr != nil { + return nil, deleteErr + } + glog.V(2).Infof("ensure(%s): pip(%s) - finished", serviceName, pipName) + } + } + + } + + if !isInternal && wantLb { + // Confirm desired public ip resource exists + var rpip *network.PublicIPAddress + domainNameLabel := getPublicIPLabel(service) + if rpip, err = az.ensurePublicIPExists(serviceName, desiredPipName, domainNameLabel); err != nil { + return nil, err + } + return rpip, nil + } + return nil, nil } func findProbe(probes []network.Probe, probe network.Probe) bool { @@ -945,7 +939,7 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b // This ensures the given VM's Primary NIC's Primary IP Configuration is // participating in the specified LoadBalancer Backend Pool. -func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error { +func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string, availabilitySetName string) error { var machine compute.VirtualMachine vmName := mapNodeNameToVMName(nodeName) az.operationPollRateLimiter.Accept() @@ -975,12 +969,12 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b } // Check availability set - if az.PrimaryAvailabilitySetName != "" { - expectedAvailabilitySetName := az.getAvailabilitySetID(az.PrimaryAvailabilitySetName) + if availabilitySetName != "" { + expectedAvailabilitySetName := az.getAvailabilitySetID(availabilitySetName) if machine.AvailabilitySet == nil || !strings.EqualFold(*machine.AvailabilitySet.ID, expectedAvailabilitySetName) { glog.V(3).Infof( - "nicupdate(%s): skipping nic (%s) since it is not in the primaryAvailabilitSet(%s)", - serviceName, nicName, az.PrimaryAvailabilitySetName) + "nicupdate(%s): skipping nic (%s) since it is not in the availabilitSet(%s)", + serviceName, nicName, availabilitySetName) return nil } } @@ -1058,3 +1052,16 @@ func subnet(service *v1.Service) *string { return nil } + +func getServiceLoadBalancerMode(service *v1.Service) (hasMode bool, isAuto bool, asl []string) { + mode, hasMode := service.Annotations[ServiceAnnotationLoadBalancerMode] + isAuto = strings.EqualFold(mode, ServiceAnnotationLoadBalancerAutoModeValue) + if !isAuto { + asTagList := strings.TrimSpace(mode) + + // Break up list of "AS1,AS2" + asl = strings.Split(asTagList, ",") + } + + return hasMode, isAuto, asl +} diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md new file mode 100644 index 00000000000..84a77a6784b --- /dev/null +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md @@ -0,0 +1,68 @@ +# Azure LoadBalancer + +The way azure define LoadBalancer is different with GCE or AWS. Azure's LB can have multiple frontend IP refs. The GCE and AWS can only allow one, if you want more, you better to have another LB. Because of the fact, Public IP is not part of the LB in Azure. NSG is not part of LB in Azure as well. However, you cannot delete them in parallel, Public IP can only be delete after LB's frontend IP ref is removed. + +For different Azure Resources, such as LB, Public IP, NSG. They are the same tier azure resourceS. We need to make sure there is no connection in their own ensure loops. In another words, They would be eventually reconciled regardless of other resources' state. They should only depends on service state. + +And also, For Azure, we cannot afford to have more than 1 worker of service_controller. Because, different services could operate on the same LB, concurrent execution could result in conflict or unexpected result. For AWS and GCE, they apparently doesn't have the problem, they use one LB per service, no such conflict. + +There are two load balancers per availability set internal and external. There is a limit on number of services that can be associated with a single load balancer. +By default primary load balancer is selected. Services can be annotated to allow auto selection of available load balancers. Service annotations can also be used to provide specific availability sets that host the load balancers. Note that in case of auto selection or specific availability set selection, when the availability set is lost incase of downtime or cluster scale down the services are currently not auto assigned to an available load balancer. +Service Annotation for Auto and specific load balancer mode + +- service.beta.kubernetes.io/azure-load-balancer-mode" (__auto__|as1,as2...) + +## Introduce Functions + +- reconcileLoadBalancer(lb network.LoadBalancer, clusterName string, service *v1.Service, nodes []*v1.Node, wantLB bool) (network.LoadBalancer, error) + - Go through lb's properties, update based on wantLB + - If any change on the lb, no matter if the lb exists or not + - Call az cloud to CreateOrUpdate on this lb, or Delete if nothing left + - return lb, err + +- reconcileSecurityGroup(sg network.SecurityGroup, clusterName string, service *v1.Service, wantLb bool) (network.SecurityGroup, error) + - Go though NSG' properties, update based on wantLB + - If any change on the NSG, (the NSG should always exists) + - Call az cloud to CreateOrUpdate on this NSG + - return sg, err + +- reconcilePublicIP(pipName string, clusterName string, service *v1.Service, wantLB bool) (error) + - if wantLB and external LB, + - ensure Azure Public IP resource is there + - when we ensure Public IP, it needs to be both Name and Tag match with the convention + - remove dangling Public IP that could have Name or Tag match with the service, but not both + - else, ensure Azure Public IP resource is not there + +- getServiceLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node, wantLb bool) (lb, status, exists, error) + - gets the loadbalancer for the service if it already exits + - If wantLb is TRUE then -it selects a new load balancer, the selction helps distribute the services across load balancers + - In case the selected load balancer does not exists it returns network.LoadBalancer struct with added metadata (such as name, location) and existsLB set to FALSE + - By default - cluster default LB is returned + +## Define interface behaviors + +### GetLoadBalancer + +- Get LoadBalancer status, return status, error + - If not exist, ensure it is there + +### EnsureLoadBalancer + +- Reconcile LB's related but not owned resources, such as Public IP, NSG rules + - Call reconcileSecurityGroup(sg, clusterName, service, true) + - Call reconcilePublicIP(pipName, cluster, service, true) +- Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe. + - Call reconcileLoadBalancer(lb, clusterName, service, nodes, true) + +### UpdateLoadBalancer + +- Has no difference with EnsureLoadBalancer + +### EnsureLoadBalancerDeleted + +- Reconcile NSG first, before reconcile LB, because SG need LB to be there + - Call reconcileSecurityGroup(sg, clusterName, service, false) +- Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe. + - Call reconcileLoadBalancer(lb, clusterName, service, nodes, false) +- Reconcile LB's related but not owned resources, such as Public IP + - Call reconcilePublicIP(pipName, cluster, service, false) \ No newline at end of file diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index c364b7f4d8a..3bbdda0e7b3 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -26,9 +26,13 @@ import ( "testing" "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/flowcontrol" serviceapi "k8s.io/kubernetes/pkg/api/v1/service" + kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" + "github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/go-autorest/autorest/to" ) @@ -36,12 +40,10 @@ import ( var testClusterName = "testCluster" // Test additional of a new service/port. -func TestReconcileLoadBalancerAddPort(t *testing.T) { +func TestAddPort(t *testing.T) { az := getTestCloud() svc := getTestService("servicea", v1.ProtocolTCP, 80) - configProperties := getTestPublicFipConfigurationProperties() - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + clusterResources := getClusterResources(az, 1, 1) svc.Spec.Ports = append(svc.Spec.Ports, v1.ServicePort{ Name: fmt.Sprintf("port-udp-%d", 1234), @@ -50,15 +52,11 @@ func TestReconcileLoadBalancerAddPort(t *testing.T) { NodePort: getBackendPort(1234), }) - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we got a frontend ip configuration if len(*lb.FrontendIPConfigurations) != 1 { t.Error("Expected the loadbalancer to have a frontend ip configuration") @@ -67,24 +65,302 @@ func TestReconcileLoadBalancerAddPort(t *testing.T) { validateLoadBalancer(t, lb, svc) } +func TestLoadBalancerInternalServiceModeSelection(t *testing.T) { + testLoadBalancerServiceDefaultModeSelection(t, true) + testLoadBalancerServiceAutoModeSelection(t, true) + testLoadBalancerServicesSpecifiedSelection(t, true) + testLoadBalancerMaxRulesServices(t, true) + testLoadBalancerServiceAutoModeDeleteSelection(t, true) +} + +func TestLoadBalancerExternalServiceModeSelection(t *testing.T) { + testLoadBalancerServiceDefaultModeSelection(t, false) + testLoadBalancerServiceAutoModeSelection(t, false) + testLoadBalancerServicesSpecifiedSelection(t, false) + testLoadBalancerMaxRulesServices(t, false) + testLoadBalancerServiceAutoModeDeleteSelection(t, false) +} + +func testLoadBalancerServiceDefaultModeSelection(t *testing.T, isInternal bool) { + az := getTestCloud() + const vmCount = 8 + const availabilitySetCount = 4 + const serviceCount = 9 + + clusterResources := getClusterResources(az, vmCount, availabilitySetCount) + getTestSecurityGroup(az) + + for index := 1; index <= serviceCount; index++ { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + + lbStatus, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + if lbStatus == nil { + t.Errorf("Unexpected error: %s", svcName) + } + + expectedLBName := testClusterName + if isInternal { + expectedLBName = testClusterName + "-internal" + } + + result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) + lb := (*result.Value)[0] + lbCount := len(*result.Value) + expectedNumOfLB := 1 + if lbCount != expectedNumOfLB { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + } + + if !strings.EqualFold(*lb.Name, expectedLBName) { + t.Errorf("lb name should be the default LB name Extected (%s) Fouund (%s)", expectedLBName, *lb.Name) + } + + ruleCount := len(*lb.LoadBalancingRules) + if ruleCount != index { + t.Errorf("lb rule could should be equal to nuber of services deployed, expected (%d) Found (%d)", index, ruleCount) + } + } +} + +// Validate even distribution of external services across load balances +// based on number of availability sets +func testLoadBalancerServiceAutoModeSelection(t *testing.T, isInternal bool) { + az := getTestCloud() + const vmCount = 8 + const availabilitySetCount = 4 + const serviceCount = 9 + + clusterResources := getClusterResources(az, vmCount, availabilitySetCount) + getTestSecurityGroup(az) + + for index := 1; index <= serviceCount; index++ { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + setLoadBalancerAutoModeAnnotation(&svc) + lbStatus, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + if lbStatus == nil { + t.Errorf("Unexpected error: %s", svcName) + } + + expectedNumOfLB := index % availabilitySetCount + if index >= availabilitySetCount { + expectedNumOfLB = availabilitySetCount + } + result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) + lbCount := len(*result.Value) + if lbCount != expectedNumOfLB { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + } + + maxRules := 0 + minRules := serviceCount + for x := range *result.Value { + lb := (*result.Value)[x] + ruleCount := len(*lb.LoadBalancingRules) + if ruleCount < minRules { + minRules = ruleCount + } + if ruleCount > maxRules { + maxRules = ruleCount + } + } + + delta := maxRules - minRules + if delta > 1 { + t.Errorf("Unexpected min or max rule in LB's in resource group: Service Index (%d) Min (%d) Max(%d)", index, minRules, maxRules) + } + } +} + +// Validate availability set selection of services across load balancers +// based on provided availability sets through service annotation +func testLoadBalancerServicesSpecifiedSelection(t *testing.T, isInternal bool) { + az := getTestCloud() + const vmCount = 8 + const availabilitySetCount = 4 + const serviceCount = 9 + + clusterResources := getClusterResources(az, vmCount, availabilitySetCount) + getTestSecurityGroup(az) + + selectedAvailabilitySetName1 := getASName(az, 1, availabilitySetCount) + selectedAvailabilitySetName2 := getASName(az, 2, availabilitySetCount) + for index := 1; index <= serviceCount; index++ { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + lbMode := fmt.Sprintf("%s,%s", selectedAvailabilitySetName1, selectedAvailabilitySetName2) + setLoadBalancerModeAnnotation(&svc, lbMode) + + lbStatus, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + if lbStatus == nil { + t.Errorf("Unexpected error: %s", svcName) + } + + expectedNumOfLB := index % 2 + if index >= 2 { + expectedNumOfLB = 2 + } + result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) + lbCount := len(*result.Value) + if lbCount != expectedNumOfLB { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + } + } +} + +func testLoadBalancerMaxRulesServices(t *testing.T, isInternal bool) { + az := getTestCloud() + const vmCount = 1 + const availabilitySetCount = 1 + + clusterResources := getClusterResources(az, vmCount, availabilitySetCount) + getTestSecurityGroup(az) + + az.Config.MaximumLoadBalancerRuleCount = 1 + + for index := 1; index <= az.Config.MaximumLoadBalancerRuleCount; index++ { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + + lbStatus, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + if lbStatus == nil { + t.Errorf("Unexpected error: %s", svcName) + } + + expectedNumOfLB := index % az.Config.MaximumLoadBalancerRuleCount + if index >= az.Config.MaximumLoadBalancerRuleCount { + expectedNumOfLB = az.Config.MaximumLoadBalancerRuleCount + } + result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) + lbCount := len(*result.Value) + if lbCount != expectedNumOfLB { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + } + } + + // validate adding a new service fails since it will exceed the max limit on LB + svcName := fmt.Sprintf("service-%d", az.Config.MaximumLoadBalancerRuleCount+1) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + _, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err == nil { + t.Errorf("Expect any new service to fail as max limit in lb has reached") + } +} + +// Validate even distribution of external services across load balances +// based on number of availability sets +func testLoadBalancerServiceAutoModeDeleteSelection(t *testing.T, isInternal bool) { + az := getTestCloud() + const vmCount = 8 + const availabilitySetCount = 4 + const serviceCount = 9 + + clusterResources := getClusterResources(az, vmCount, availabilitySetCount) + getTestSecurityGroup(az) + + for index := 1; index <= serviceCount; index++ { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + setLoadBalancerAutoModeAnnotation(&svc) + lbStatus, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + if lbStatus == nil { + t.Errorf("Unexpected error: %s", svcName) + } + } + + for index := serviceCount; index >= 1; index-- { + svcName := fmt.Sprintf("service-%d", index) + var svc v1.Service + if isInternal { + svc = getInternalTestService(svcName, 8081) + addTestSubnet(t, az, &svc) + } else { + svc = getTestService(svcName, v1.ProtocolTCP, 8081) + } + + setLoadBalancerAutoModeAnnotation(&svc) + + expectedNumOfLB := index % availabilitySetCount + if index >= availabilitySetCount { + expectedNumOfLB = availabilitySetCount + } + result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) + lbCount := len(*result.Value) + if lbCount != expectedNumOfLB { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + } + + err := az.EnsureLoadBalancerDeleted(testClusterName, &svc) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + } +} + // Test addition of a new service on an internal LB with a subnet. func TestReconcileLoadBalancerAddServiceOnInternalSubnet(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc := getInternalTestService("servicea", 80) - addTestSubnet(t, &svc) - configProperties := getTestInternalFipConfigurationProperties(to.StringPtr("TestSubnet")) - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + addTestSubnet(t, az, &svc) - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we got a frontend ip configuration if len(*lb.FrontendIPConfigurations) != 1 { t.Error("Expected the loadbalancer to have a frontend ip configuration") @@ -96,46 +372,48 @@ func TestReconcileLoadBalancerAddServiceOnInternalSubnet(t *testing.T) { // Test addition of services on an internal LB using both default and explicit subnets. func TestReconcileLoadBalancerAddServicesOnMultipleSubnets(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc1 := getTestService("service1", v1.ProtocolTCP, 8081) svc2 := getInternalTestService("service2", 8081) - addTestSubnet(t, &svc2) - configProperties1 := getTestPublicFipConfigurationProperties() - configProperties2 := getTestInternalFipConfigurationProperties(to.StringPtr("TestSubnet")) - lb := getTestLoadBalancer() - nodes := []*v1.Node{} - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties1, testClusterName, &svc1, nodes) + // Internal and External service cannot reside on the same LB resource + addTestSubnet(t, az, &svc2) + + // svc1 is using LB without "-internal" suffix + lb, err := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling svc1: %q", err) } - lb, updated, err = az.reconcileLoadBalancer(lb, &configProperties2, testClusterName, &svc2, nodes) + // ensure we got a frontend ip configuration for each service + if len(*lb.FrontendIPConfigurations) != 1 { + t.Error("Expected the loadbalancer to have 1 frontend ip configurations") + } + + validateLoadBalancer(t, lb, svc1) + + // svc2 is using LB with "-internal" suffix + lb, err = az.reconcileLoadBalancer(testClusterName, &svc2, nil, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling svc2: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we got a frontend ip configuration for each service - if len(*lb.FrontendIPConfigurations) != 2 { - t.Error("Expected the loadbalancer to have 2 frontend ip configurations") + if len(*lb.FrontendIPConfigurations) != 1 { + t.Error("Expected the loadbalancer to have 1 frontend ip configurations") } - validateLoadBalancer(t, lb, svc1, svc2) + validateLoadBalancer(t, lb, svc2) } // Test moving a service exposure from one subnet to another. func TestReconcileLoadBalancerEditServiceSubnet(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc := getInternalTestService("service1", 8081) - addTestSubnet(t, &svc) - configProperties := getTestInternalFipConfigurationProperties(to.StringPtr("TestSubnet")) - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + addTestSubnet(t, az, &svc) - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling initial svc: %q", err) } @@ -143,17 +421,13 @@ func TestReconcileLoadBalancerEditServiceSubnet(t *testing.T) { validateLoadBalancer(t, lb, svc) svc.Annotations[ServiceAnnotationLoadBalancerInternalSubnet] = "NewSubnet" - configProperties = getTestInternalFipConfigurationProperties(to.StringPtr("NewSubnet")) + addTestSubnet(t, az, &svc) - lb, updated, err = az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err = az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling edits to svc: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we got a frontend ip configuration for the service if len(*lb.FrontendIPConfigurations) != 1 { t.Error("Expected the loadbalancer to have 1 frontend ip configuration") @@ -164,23 +438,16 @@ func TestReconcileLoadBalancerEditServiceSubnet(t *testing.T) { func TestReconcileLoadBalancerNodeHealth(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc := getTestService("servicea", v1.ProtocolTCP, 80) svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal svc.Spec.HealthCheckNodePort = int32(32456) - configProperties := getTestPublicFipConfigurationProperties() - lb := getTestLoadBalancer() - nodes := []*v1.Node{} - - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we got a frontend ip configuration if len(*lb.FrontendIPConfigurations) != 1 { t.Error("Expected the loadbalancer to have a frontend ip configuration") @@ -192,24 +459,17 @@ func TestReconcileLoadBalancerNodeHealth(t *testing.T) { // Test removing all services results in removing the frontend ip configuration func TestReconcileLoadBalancerRemoveService(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) - lb := getTestLoadBalancer() - configProperties := getTestPublicFipConfigurationProperties() - nodes := []*v1.Node{} - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) - if err != nil { - t.Errorf("Unexpected error: %q", err) - } - validateLoadBalancer(t, lb, svc) - - lb, updated, err = az.reconcileLoadBalancer(lb, nil, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") + lb, err = az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, false /* wantLb */) + if err != nil { + t.Errorf("Unexpected error: %q", err) } // ensure we abandoned the frontend ip configuration @@ -223,27 +483,21 @@ func TestReconcileLoadBalancerRemoveService(t *testing.T) { // Test removing all service ports results in removing the frontend ip configuration func TestReconcileLoadBalancerRemoveAllPortsRemovesFrontendConfig(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc := getTestService("servicea", v1.ProtocolTCP, 80) - lb := getTestLoadBalancer() - configProperties := getTestPublicFipConfigurationProperties() - nodes := []*v1.Node{} - lb, updated, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } validateLoadBalancer(t, lb, svc) svcUpdated := getTestService("servicea", v1.ProtocolTCP) - lb, updated, err = az.reconcileLoadBalancer(lb, nil, testClusterName, &svcUpdated, nodes) + lb, err = az.reconcileLoadBalancer(testClusterName, &svcUpdated, clusterResources.nodes, false /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } - if !updated { - t.Error("Expected the loadbalancer to need an update") - } - // ensure we abandoned the frontend ip configuration if len(*lb.FrontendIPConfigurations) != 0 { t.Error("Expected the loadbalancer to have no frontend ip configuration") @@ -255,37 +509,36 @@ func TestReconcileLoadBalancerRemoveAllPortsRemovesFrontendConfig(t *testing.T) // Test removal of a port from an existing service. func TestReconcileLoadBalancerRemovesPort(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) + svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) - configProperties := getTestPublicFipConfigurationProperties() - nodes := []*v1.Node{} - - existingLoadBalancer := getTestLoadBalancer(svc) - - svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80) - updatedLoadBalancer, _, err := az.reconcileLoadBalancer(existingLoadBalancer, &configProperties, testClusterName, &svcUpdated, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - validateLoadBalancer(t, updatedLoadBalancer, svcUpdated) + svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80) + lb, err = az.reconcileLoadBalancer(testClusterName, &svcUpdated, clusterResources.nodes, true /* wantLb */) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + + validateLoadBalancer(t, lb, svcUpdated) } // Test reconciliation of multiple services on same port func TestReconcileLoadBalancerMultipleServices(t *testing.T) { az := getTestCloud() + clusterResources := getClusterResources(az, 1, 1) svc1 := getTestService("servicea", v1.ProtocolTCP, 80, 443) svc2 := getTestService("serviceb", v1.ProtocolTCP, 80) - configProperties := getTestPublicFipConfigurationProperties() - nodes := []*v1.Node{} - existingLoadBalancer := getTestLoadBalancer() - - updatedLoadBalancer, _, err := az.reconcileLoadBalancer(existingLoadBalancer, &configProperties, testClusterName, &svc1, nodes) + updatedLoadBalancer, err := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } - updatedLoadBalancer, _, err = az.reconcileLoadBalancer(updatedLoadBalancer, &configProperties, testClusterName, &svc2, nodes) + updatedLoadBalancer, err = az.reconcileLoadBalancer(testClusterName, &svc2, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -378,11 +631,13 @@ func TestServiceRespectsClientIPSessionAffinity(t *testing.T) { func TestReconcileSecurityGroupNewServiceAddsPort(t *testing.T) { az := getTestCloud() - svc1 := getTestService("serviceea", v1.ProtocolTCP, 80) + getTestSecurityGroup(az) + svc1 := getTestService("servicea", v1.ProtocolTCP, 80) + clusterResources := getClusterResources(az, 1, 1) + lb, _ := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true) + lbStatus, _ := az.getServiceLoadBalancerStatus(&svc1, lb) - sg := getTestSecurityGroup() - - sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, to.StringPtr("192.168.0.0"), true) + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, lbStatus, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -392,11 +647,14 @@ func TestReconcileSecurityGroupNewServiceAddsPort(t *testing.T) { func TestReconcileSecurityGroupNewInternalServiceAddsPort(t *testing.T) { az := getTestCloud() + getTestSecurityGroup(az) svc1 := getInternalTestService("serviceea", 80) + addTestSubnet(t, az, &svc1) + clusterResources := getClusterResources(az, 1, 1) - sg := getTestSecurityGroup() - - sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc1, to.StringPtr("192.168.0.0"), true) + lb, _ := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true) + lbStatus, _ := az.getServiceLoadBalancerStatus(&svc1, lb) + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, lbStatus, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -405,14 +663,20 @@ func TestReconcileSecurityGroupNewInternalServiceAddsPort(t *testing.T) { } func TestReconcileSecurityGroupRemoveService(t *testing.T) { + az := getTestCloud() service1 := getTestService("servicea", v1.ProtocolTCP, 81) service2 := getTestService("serviceb", v1.ProtocolTCP, 82) + clusterResources := getClusterResources(az, 1, 1) - sg := getTestSecurityGroup(service1, service2) + lb, _ := az.reconcileLoadBalancer(testClusterName, &service1, clusterResources.nodes, true) + az.reconcileLoadBalancer(testClusterName, &service2, clusterResources.nodes, true) + lbStatus, _ := az.getServiceLoadBalancerStatus(&service1, lb) + + sg := getTestSecurityGroup(az, service1, service2) validateSecurityGroup(t, sg, service1, service2) - az := getTestCloud() - sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &service1, nil, false) + + sg, err := az.reconcileSecurityGroup(testClusterName, &service1, lbStatus, false /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -423,11 +687,14 @@ func TestReconcileSecurityGroupRemoveService(t *testing.T) { func TestReconcileSecurityGroupRemoveServiceRemovesPort(t *testing.T) { az := getTestCloud() svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) + clusterResources := getClusterResources(az, 1, 1) - sg := getTestSecurityGroup(svc) - + sg := getTestSecurityGroup(az, svc) svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80) - sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svcUpdated, to.StringPtr("192.168.0.0"), true) + lb, _ := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true) + lbStatus, _ := az.getServiceLoadBalancerStatus(&svc, lb) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svcUpdated, lbStatus, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -442,9 +709,13 @@ func TestReconcileSecurityWithSourceRanges(t *testing.T) { "192.168.0.0/24", "10.0.0.0/32", } + clusterResources := getClusterResources(az, 1, 1) - sg := getTestSecurityGroup(svc) - sg, _, err := az.reconcileSecurityGroup(sg, testClusterName, &svc, to.StringPtr("192.168.0.0"), true) + sg := getTestSecurityGroup(az, svc) + lb, _ := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true) + lbStatus, _ := az.getServiceLoadBalancerStatus(&svc, lb) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc, lbStatus, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -452,19 +723,230 @@ func TestReconcileSecurityWithSourceRanges(t *testing.T) { validateSecurityGroup(t, sg, svc) } -func getTestCloud() *Cloud { - return &Cloud{ +func TestReconcilePublicIPWithNewService(t *testing.T) { + az := getTestCloud() + svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) + + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svc, true) + + pip2, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB */) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svc, true) + if pip.Name != pip2.Name || + pip.PublicIPAddressPropertiesFormat.IPAddress != pip2.PublicIPAddressPropertiesFormat.IPAddress { + t.Errorf("We should get the exact same public ip resource after a second reconcile") + } +} + +func TestReconcilePublicIPRemoveService(t *testing.T) { + az := getTestCloud() + svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) + + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + + validatePublicIP(t, pip, &svc, true) + + // Remove the service + pip, err = az.reconcilePublicIP(testClusterName, &svc, false /* wantLB */) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svc, false) + +} + +func TestReconcilePublicIPWithInternalService(t *testing.T) { + az := getTestCloud() + svc := getInternalTestService("servicea", 80, 443) + + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + + validatePublicIP(t, pip, &svc, true) +} + +func TestReconcilePublicIPWithExternalAndInternalSwitch(t *testing.T) { + az := getTestCloud() + svc := getInternalTestService("servicea", 80, 443) + + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svc, true) + + // Update to external service + svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80) + pip, err = az.reconcilePublicIP(testClusterName, &svcUpdated, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svcUpdated, true) + + // Update to internal service again + pip, err = az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + validatePublicIP(t, pip, &svc, true) +} + +func getTestCloud() (az *Cloud) { + az = &Cloud{ Config: Config{ - TenantID: "tenant", - SubscriptionID: "subscription", - ResourceGroup: "rg", - Location: "westus", - VnetName: "vnet", - SubnetName: "subnet", - SecurityGroupName: "nsg", - RouteTableName: "rt", + TenantID: "tenant", + SubscriptionID: "subscription", + ResourceGroup: "rg", + VnetResourceGroup: "rg", + Location: "westus", + VnetName: "vnet", + SubnetName: "subnet", + SecurityGroupName: "nsg", + RouteTableName: "rt", + PrimaryAvailabilitySetName: "asName", + MaximumLoadBalancerRuleCount: 250, }, } + az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(100, 100) + az.LoadBalancerClient = NewFakeAzureLBClient() + az.PublicIPAddressesClient = NewFakeAzurePIPClient(az.Config.SubscriptionID) + az.SubnetsClient = NewFakeAzureSubnetsClient() + az.SecurityGroupsClient = NewFakeAzureNSGClient() + az.VirtualMachinesClient = NewFakeVirtualMachinesClient() + az.InterfacesClient = NewFakeInterfacesClient() + + return az +} + +const networkInterfacesIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkInterfaces/%s" +const primaryIPConfigIDTemplate = "%s/ipConfigurations/ipconfig" + +// returns the full identifier of a publicIPAddress. +func getNetworkInterfacesID(subscriptionID string, resourceGroupName, nicName string) string { + return fmt.Sprintf( + networkInterfacesIDTemplate, + subscriptionID, + resourceGroupName, + nicName) +} + +// returns the full identifier of a private ipconfig of the nic +func getPrimaryIPConfigID(nicID string) string { + return fmt.Sprintf( + primaryIPConfigIDTemplate, + nicID) +} + +const TestResourceNameFormat = "%s-%d" +const TestVMResourceBaseName = "vm" +const TestASResourceBaseName = "as" + +func getTestResourceName(resourceBaseName string, index int) string { + return fmt.Sprintf(TestResourceNameFormat, resourceBaseName, index) +} + +func getVMName(vmIndex int) string { + return getTestResourceName(TestVMResourceBaseName, vmIndex) +} + +func getASName(az *Cloud, vmIndex int, numAS int) string { + asIndex := vmIndex % numAS + if asIndex == 0 { + return az.Config.PrimaryAvailabilitySetName + } + + return getTestResourceName(TestASResourceBaseName, asIndex) +} + +func getNICName(vmIndex int) string { + // test supporting on 1 nic per vm + return getVMName(vmIndex) +} + +type ClusterResources struct { + nodes []*v1.Node + availabilitySetNames []string +} + +func getClusterResources(az *Cloud, vmCount int, availabilitySetCount int) (clusterResources *ClusterResources) { + if vmCount < availabilitySetCount { + return nil + } + clusterResources = &ClusterResources{} + clusterResources.nodes = []*v1.Node{} + clusterResources.availabilitySetNames = []string{} + for vmIndex := 0; vmIndex < vmCount; vmIndex++ { + vmName := getVMName(vmIndex) + asName := getASName(az, vmIndex, availabilitySetCount) + clusterResources.availabilitySetNames = append(clusterResources.availabilitySetNames, asName) + + nicName := getNICName(vmIndex) + nicID := getNetworkInterfacesID(az.Config.SubscriptionID, az.Config.ResourceGroup, nicName) + primaryIPConfigID := getPrimaryIPConfigID(nicID) + isPrimary := true + newNIC := network.Interface{ + ID: &nicID, + Name: &nicName, + InterfacePropertiesFormat: &network.InterfacePropertiesFormat{ + IPConfigurations: &[]network.InterfaceIPConfiguration{ + { + ID: &primaryIPConfigID, + InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: &nicName, + Primary: &isPrimary, + }, + }, + }, + }, + } + az.InterfacesClient.CreateOrUpdate(az.Config.ResourceGroup, nicName, newNIC, nil) + + // create vm + asID := az.getAvailabilitySetID(asName) + newVM := compute.VirtualMachine{ + Name: &vmName, + Location: &az.Config.Location, + VirtualMachineProperties: &compute.VirtualMachineProperties{ + AvailabilitySet: &compute.SubResource{ + ID: &asID, + }, + NetworkProfile: &compute.NetworkProfile{ + NetworkInterfaces: &[]compute.NetworkInterfaceReference{ + { + ID: &nicID, + }, + }, + }, + }, + } + + _, errChan := az.VirtualMachinesClient.CreateOrUpdate(az.Config.ResourceGroup, vmName, newVM, nil) + if err := <-errChan; err != nil { + } + // add to kubernetes + newNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmName, + Labels: map[string]string{ + kubeletapis.LabelHostname: vmName, + }, + }, + } + clusterResources.nodes = append(clusterResources.nodes, newNode) + } + + return clusterResources } func getBackendPort(port int32) int32 { @@ -516,10 +998,17 @@ func getTestService(identifier string, proto v1.Protocol, requestedPorts ...int3 func getInternalTestService(identifier string, requestedPorts ...int32) v1.Service { svc := getTestService(identifier, v1.ProtocolTCP, requestedPorts...) svc.Annotations[ServiceAnnotationLoadBalancerInternal] = "true" - return svc } +func setLoadBalancerModeAnnotation(service *v1.Service, lbMode string) { + service.Annotations[ServiceAnnotationLoadBalancerMode] = lbMode +} + +func setLoadBalancerAutoModeAnnotation(service *v1.Service) { + setLoadBalancerModeAnnotation(service, ServiceAnnotationLoadBalancerAutoModeValue) +} + func getTestLoadBalancer(services ...v1.Service) network.LoadBalancer { rules := []network.LoadBalancingRule{} probes := []network.Probe{} @@ -563,7 +1052,7 @@ func getServiceSourceRanges(service *v1.Service) []string { return service.Spec.LoadBalancerSourceRanges } -func getTestSecurityGroup(services ...v1.Service) network.SecurityGroup { +func getTestSecurityGroup(az *Cloud, services ...v1.Service) *network.SecurityGroup { rules := []network.SecurityRule{} for _, service := range services { @@ -583,15 +1072,22 @@ func getTestSecurityGroup(services ...v1.Service) network.SecurityGroup { } sg := network.SecurityGroup{ + Name: &az.SecurityGroupName, SecurityGroupPropertiesFormat: &network.SecurityGroupPropertiesFormat{ SecurityRules: &rules, }, } - return sg + az.SecurityGroupsClient.CreateOrUpdate( + az.ResourceGroup, + az.SecurityGroupName, + sg, + nil) + + return &sg } -func validateLoadBalancer(t *testing.T, loadBalancer network.LoadBalancer, services ...v1.Service) { +func validateLoadBalancer(t *testing.T, loadBalancer *network.LoadBalancer, services ...v1.Service) { expectedRuleCount := 0 expectedFrontendIPCount := 0 expectedProbeCount := 0 @@ -718,7 +1214,34 @@ func describeFIPs(frontendIPs []network.FrontendIPConfiguration) string { return description } -func validateSecurityGroup(t *testing.T, securityGroup network.SecurityGroup, services ...v1.Service) { +func validatePublicIP(t *testing.T, publicIP *network.PublicIPAddress, service *v1.Service, wantLB bool) { + isInternal := requiresInternalLoadBalancer(service) + if isInternal || !wantLB { + if publicIP != nil { + t.Errorf("Expected publicIP resource to be nil, when it is an internal service or doesn't want LB") + } + return + } + + // For external service + if publicIP == nil { + t.Errorf("Expected publicIP resource exists, when it is not an internal service") + } + + if publicIP.Tags == nil || (*publicIP.Tags)["service"] == nil { + t.Errorf("Expected publicIP resource has tags[service]") + } + + serviceName := getServiceName(service) + if serviceName != *(*publicIP.Tags)["service"] { + t.Errorf("Expected publicIP resource has matching tags[service]") + } + // We cannot use service.Spec.LoadBalancerIP to compare with + // Public IP's IPAddress + // Becuase service properties are updated outside of cloudprovider code +} + +func validateSecurityGroup(t *testing.T, securityGroup *network.SecurityGroup, services ...v1.Service) { expectedRuleCount := 0 for _, svc := range services { for _, wantedRule := range svc.Spec.Ports { @@ -839,10 +1362,6 @@ func TestNewCloudFromJSON(t *testing.T) { "routeTableName": "--route-table-name--", "primaryAvailabilitySetName": "--primary-availability-set-name--", "cloudProviderBackoff": true, - "cloudProviderBackoffRetries": 6, - "cloudProviderBackoffExponent": 1.5, - "cloudProviderBackoffDuration": 5, - "cloudProviderBackoffJitter": 1.0, "cloudProviderRatelimit": true, "cloudProviderRateLimitQPS": 0.5, "cloudProviderRateLimitBucket": 5 @@ -1128,9 +1647,29 @@ func TestMetadataParsing(t *testing.T) { } } -func addTestSubnet(t *testing.T, svc *v1.Service) { +func addTestSubnet(t *testing.T, az *Cloud, svc *v1.Service) { if svc.Annotations[ServiceAnnotationLoadBalancerInternal] != "true" { t.Error("Subnet added to non-internal service") } - svc.Annotations[ServiceAnnotationLoadBalancerInternalSubnet] = "TestSubnet" + subName := svc.Annotations[ServiceAnnotationLoadBalancerInternalSubnet] + if subName == "" { + subName = az.SubnetName + } + + subnetID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s", + az.SubscriptionID, + az.VnetResourceGroup, + az.VnetName, + subName) + + _, errChan := az.SubnetsClient.CreateOrUpdate(az.VnetResourceGroup, az.VnetName, subName, + network.Subnet{ + ID: &subnetID, + Name: &subName, + }, nil) + + if err := <-errChan; err != nil { + t.Errorf("Subnet cannot be created or update, %v", err) + } + svc.Annotations[ServiceAnnotationLoadBalancerInternalSubnet] = subName } diff --git a/pkg/cloudprovider/providers/azure/azure_util.go b/pkg/cloudprovider/providers/azure/azure_util.go index bfd3e08bce9..3c98e4b08d8 100644 --- a/pkg/cloudprovider/providers/azure/azure_util.go +++ b/pkg/cloudprovider/providers/azure/azure_util.go @@ -20,7 +20,9 @@ import ( "errors" "fmt" "hash/crc32" + "math" "regexp" + "sort" "strconv" "strings" @@ -31,6 +33,7 @@ import ( "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/golang/glog" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" ) const ( @@ -44,6 +47,12 @@ const ( loadBalancerRuleIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/loadBalancingRules/%s" loadBalancerProbeIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/probes/%s" securityRuleIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkSecurityGroups/%s/securityRules/%s" + + // InternalLoadBalancerNameSuffix is load balancer posfix + InternalLoadBalancerNameSuffix = "-internal" + + // nodeLabelRole specifies the role of a node + nodeLabelRole = "kubernetes.io/role" ) var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`) @@ -116,6 +125,197 @@ func (az *Cloud) getSecurityRuleID(securityRuleName string) string { securityRuleName) } +// returns the full identifier of a publicIPAddress. +func (az *Cloud) getpublicIPAddressID(pipName string) string { + return fmt.Sprintf( + publicIPAddressIDTemplate, + az.SubscriptionID, + az.ResourceGroup, + pipName) +} + +// select load balancer for the service in the cluster +func (az *Cloud) selectLoadBalancer(clusterName string, service *v1.Service, existingLBs *[]network.LoadBalancer, nodes []*v1.Node) (selectedLB *network.LoadBalancer, existsLb bool, err error) { + isInternal := requiresInternalLoadBalancer(service) + serviceName := getServiceName(service) + glog.V(3).Infof("selectLoadBalancer(%s): isInternal(%s) - start", serviceName, isInternal) + availabilitySetNames, err := az.getLoadBalancerAvailabilitySetNames(service, nodes) + if err != nil { + return nil, false, err + } + glog.Infof("selectLoadBalancer(%s): isInternal(%s) - availabilitysetsname %v", serviceName, isInternal, *availabilitySetNames) + mapExistingLBs := map[string]*network.LoadBalancer{} + for lbx := range *existingLBs { + lb := (*existingLBs)[lbx] + mapExistingLBs[*lb.Name] = &lb + } + selectedLBRuleCount := math.MaxInt32 + for asx := range *availabilitySetNames { + currASName := (*availabilitySetNames)[asx] + currLBName := az.getLoadBalancerName(clusterName, currASName, isInternal) + lb, ok := mapExistingLBs[currLBName] + if !ok { + // select this LB as this is a new LB and will have minimum rules + // create tmp lb struct to hold metadata for the new load-balancer + selectedLB = &network.LoadBalancer{ + Name: &currLBName, + Location: &az.Location, + LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{}, + } + + return selectedLB, false, nil + } + + lbRules := *lb.LoadBalancingRules + currLBRuleCount := 0 + if lbRules != nil { + currLBRuleCount = len(lbRules) + } + if currLBRuleCount < selectedLBRuleCount { + selectedLBRuleCount = currLBRuleCount + selectedLB = lb + } + } + + if selectedLB == nil { + glog.Errorf("selectLoadBalancer service (%s) - unable to find load balancer for selected availability sets %v", serviceName, *availabilitySetNames) + return nil, false, fmt.Errorf("selectLoadBalancer (%s)- unable to find load balancer for selected availability sets %v", serviceName, *availabilitySetNames) + } + // validate if the selected LB has not exceeded the MaximumLoadBalancerRuleCount + if az.Config.MaximumLoadBalancerRuleCount != 0 && selectedLBRuleCount >= az.Config.MaximumLoadBalancerRuleCount { + err = fmt.Errorf("selectLoadBalancer service (%s) - all available load balancers have exceeded maximum rule limit %d", serviceName, selectedLBRuleCount) + glog.Error(err) + return selectedLB, existsLb, err + } + + return selectedLB, existsLb, nil +} + +// getLoadBalancerAvailabilitySetNames selects all possible availability sets for +// service load balancer, if the service has no loadbalancer mode annotaion returns the +// primary availability set if service annotation for loadbalancer availability set +// exists then return the eligible a availability set +func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes []*v1.Node) (availabilitySetNames *[]string, err error) { + hasMode, isAuto, serviceASL := getServiceLoadBalancerMode(service) + if !hasMode { + // legacy load balancer auto mode load balancer. + availabilitySetNames = &[]string{az.Config.PrimaryAvailabilitySetName} + return availabilitySetNames, nil + } + availabilitySetNames, err = az.getAgentPoolAvailabiliySets(nodes) + if err != nil { + return nil, err + } + if len(*availabilitySetNames) == 0 { + return nil, fmt.Errorf("No availability sets found for nodes, node count(%d)", len(nodes)) + } + // sort the list to have deterministic selection + sort.Strings(*availabilitySetNames) + if !isAuto { + if serviceASL == nil || len(serviceASL) == 0 { + return nil, fmt.Errorf("service annotation for LoadBalancerMode is empty, it should have __auto__ or availability sets value") + } + // validate availability set exists + var found bool + for sasx := range serviceASL { + for asx := range *availabilitySetNames { + if strings.EqualFold((*availabilitySetNames)[asx], serviceASL[sasx]) { + found = true + serviceASL[sasx] = (*availabilitySetNames)[asx] + break + } + } + if !found { + return nil, fmt.Errorf("availability set (%s) - not found", serviceASL[sasx]) + } + } + availabilitySetNames = &serviceASL + } + + return availabilitySetNames, nil +} + +// lists the virtual machines for for the resource group and then builds +// a list of availability sets that match the nodes available to k8s +func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]string, err error) { + vms, err := az.VirtualMachineClientListWithRetry() + if err != nil { + return nil, err + } + vmNameToAvailabilitySetID := make(map[string]string, len(vms)) + for vmx := range vms { + vm := vms[vmx] + if vm.AvailabilitySet != nil { + vmNameToAvailabilitySetID[*vm.Name] = *vm.AvailabilitySet.ID + } + } + availabilitySetIDs := sets.NewString() + agentPoolAs = &[]string{} + for nx := range nodes { + nodeName := (*nodes[nx]).Name + if isMasterNode(nodes[nx]) { + continue + } + asID, ok := vmNameToAvailabilitySetID[nodeName] + if !ok { + return nil, fmt.Errorf("Node (%s) - has no availability sets", nodeName) + } + if availabilitySetIDs.Has(asID) { + // already added in the list + continue + } + asName, err := getLastSegment(asID) + if err != nil { + glog.Errorf("az.getNodeAvailabilitySet(%s), getLastSegment(%s), err=%v", nodeName, asID, err) + return nil, err + } + // AvailabilitySet ID is currently upper cased in a indeterministic way + // We want to keep it lower case, before the ID get fixed + asName = strings.ToLower(asName) + + *agentPoolAs = append(*agentPoolAs, asName) + } + + return agentPoolAs, nil +} + +func (az *Cloud) mapLoadBalancerNameToAvailabilitySet(lbName string, clusterName string) (availabilitySetName string) { + availabilitySetName = strings.TrimSuffix(lbName, InternalLoadBalancerNameSuffix) + if strings.EqualFold(clusterName, lbName) { + availabilitySetName = az.Config.PrimaryAvailabilitySetName + } + + return availabilitySetName +} + +// For a load balancer, all frontend ip should reference either a subnet or publicIpAddress. +// Thus Azure do not allow mixed type (public and internal) load balancer. +// So we'd have a separate name for internal load balancer. +// This would be the name for Azure LoadBalancer resource. +func (az *Cloud) getLoadBalancerName(clusterName string, availabilitySetName string, isInternal bool) string { + lbNamePrefix := availabilitySetName + if strings.EqualFold(availabilitySetName, az.Config.PrimaryAvailabilitySetName) { + lbNamePrefix = clusterName + } + if isInternal { + return fmt.Sprintf("%s%s", lbNamePrefix, InternalLoadBalancerNameSuffix) + } + return lbNamePrefix +} + +// isMasterNode returns returns true is the node has a master role label. +// The master role is determined by looking for: +// * a kubernetes.io/role="master" label +func isMasterNode(node *v1.Node) bool { + for k, v := range node.Labels { + if k == nodeLabelRole && v == "master" { + return true + } + } + + return false +} + // returns the deepest child's identifier from a full identifier string. func getLastSegment(ID string) (string, error) { parts := strings.Split(ID, "/") @@ -179,16 +379,8 @@ func getPrimaryIPConfig(nic network.Interface) (*network.InterfaceIPConfiguratio return nil, fmt.Errorf("failed to determine the determine primary ipconfig. nicname=%q", *nic.Name) } -// For a load balancer, all frontend ip should reference either a subnet or publicIpAddress. -// Thus Azure do not allow mixed type (public and internal) load balancer. -// So we'd have a separate name for internal load balancer. -// This would be the name for Azure LoadBalancer resource. -func getLoadBalancerName(clusterName string, isInternal bool) string { - if isInternal { - return fmt.Sprintf("%s-internal", clusterName) - } - - return clusterName +func isInternalLoadBalancer(lb *network.LoadBalancer) bool { + return strings.HasSuffix(*lb.Name, InternalLoadBalancerNameSuffix) } func getBackendPoolName(clusterName string) string { diff --git a/pkg/cloudprovider/providers/azure/azure_wrap.go b/pkg/cloudprovider/providers/azure/azure_wrap.go index e9c06dc6fc2..8bfa2ca81eb 100644 --- a/pkg/cloudprovider/providers/azure/azure_wrap.go +++ b/pkg/cloudprovider/providers/azure/azure_wrap.go @@ -40,6 +40,19 @@ func checkResourceExistsFromError(err error) (bool, error) { return false, v } +// If it is StatusNotFound return nil, +// Otherwise, return what it is +func ignoreStatusNotFoundFromError(err error) error { + if err == nil { + return nil + } + v, ok := err.(autorest.DetailedError) + if ok && v.StatusCode == http.StatusNotFound { + return nil + } + return err +} + func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachine, exists bool, err error) { var realErr error @@ -103,7 +116,6 @@ func (az *Cloud) getSecurityGroup() (sg network.SecurityGroup, exists bool, err func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exists bool, err error) { var realErr error - az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.Get(%s): start", name) lb, err = az.LoadBalancerClient.Get(az.ResourceGroup, name, "") @@ -121,6 +133,25 @@ func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exi return lb, exists, err } +func (az *Cloud) listLoadBalancers() (lbListResult network.LoadBalancerListResult, exists bool, err error) { + var realErr error + + az.operationPollRateLimiter.Accept() + glog.V(10).Infof("LoadBalancerClient.List(%s): start", az.ResourceGroup) + lbListResult, err = az.LoadBalancerClient.List(az.ResourceGroup) + glog.V(10).Infof("LoadBalancerClient.List(%s): end", az.ResourceGroup) + exists, realErr = checkResourceExistsFromError(err) + if realErr != nil { + return lbListResult, false, realErr + } + + if !exists { + return lbListResult, false, nil + } + + return lbListResult, exists, err +} + func (az *Cloud) getPublicIPAddress(name string) (pip network.PublicIPAddress, exists bool, err error) { var realErr error From 443339da0ad3ca6f05c6a143fc1a2f37cba1080c Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Wed, 15 Nov 2017 09:41:13 -0800 Subject: [PATCH 02/18] fix documents, and correct typo --- .../providers/azure/azure_loadbalancer.go | 7 ++- .../providers/azure/azure_loadbalancer.md | 49 +++++++++++-------- .../providers/azure/azure_test.go | 20 ++++---- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 2afb6568303..54f6b7d08f3 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -62,7 +62,6 @@ func (az *Cloud) GetLoadBalancer(clusterName string, service *v1.Service) (statu glog.V(5).Infof("getloadbalancer (cluster:%s) (service:%s)- IP doesn't exist in any of the lbs", clusterName, serviceName) return nil, false, fmt.Errorf("Service(%s) - Loadbalancer not found", serviceName) } - return status, true, nil } @@ -320,17 +319,17 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel stri // This ensures load balancer exists and the frontend ip config is setup. // This also reconciles the Service's Ports with the LoadBalancer config. // This entails adding rules/probes for expected Ports and removing stale rules/ports. -// nodes only used if wantLB is true +// nodes only used if wantLb is true func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node, wantLb bool) (*network.LoadBalancer, error) { isInternal := requiresInternalLoadBalancer(service) serviceName := getServiceName(service) - glog.V(2).Infof("reconcileLoadBalancer(%s) - wantLB(%t): started", serviceName, wantLb) + glog.V(2).Infof("reconcileLoadBalancer(%s) - wantLb(%t): started", serviceName, wantLb) lb, _, _, err := az.getServiceLoadBalancer(service, clusterName, nodes, wantLb) if err != nil { return nil, err } lbName := *lb.Name - glog.V(2).Infof("reconcileLoadBalancer(%s): lb(%s) wantLB(%t) resolved load balancer name", serviceName, lbName, wantLb) + glog.V(2).Infof("reconcileLoadBalancer(%s): lb(%s) wantLb(%t) resolved load balancer name", serviceName, lbName, wantLb) lbFrontendIPConfigName := getFrontendIPConfigName(service, subnet(service)) lbFrontendIPConfigID := az.getFrontendIPConfigID(lbName, lbFrontendIPConfigName) lbBackendPoolName := getBackendPoolName(clusterName) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md index 84a77a6784b..431056893fb 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md @@ -1,8 +1,10 @@ # Azure LoadBalancer -The way azure define LoadBalancer is different with GCE or AWS. Azure's LB can have multiple frontend IP refs. The GCE and AWS can only allow one, if you want more, you better to have another LB. Because of the fact, Public IP is not part of the LB in Azure. NSG is not part of LB in Azure as well. However, you cannot delete them in parallel, Public IP can only be delete after LB's frontend IP ref is removed. +The way azure define LoadBalancer is different with GCE or AWS. Azure's LB can have multiple frontend IP refs. The GCE and AWS can only allow one, if you want more, you better to have another LB. Because of the fact, Public IP is not part of the LB in Azure. NSG is not part of LB in Azure either. However, you cannot delete them in parallel, Public IP can only be delete after LB's frontend IP ref is removed. -For different Azure Resources, such as LB, Public IP, NSG. They are the same tier azure resourceS. We need to make sure there is no connection in their own ensure loops. In another words, They would be eventually reconciled regardless of other resources' state. They should only depends on service state. +For different Azure Resources, such as LB, Public IP, NSG. They are the same tier azure resources. We need to make sure there is no connection in their own ensure loops. In another words, They would be eventually reconciled regardless of other resources' state. They should only depends on service state. + +Despite the ideal philosophy above, we have to face the reality. NSG depends on LB's frontend ip to adjust NSG rules. So when we want to reconcile NSG, the LB should contain the corresponding frontend ip config. And also, For Azure, we cannot afford to have more than 1 worker of service_controller. Because, different services could operate on the same LB, concurrent execution could result in conflict or unexpected result. For AWS and GCE, they apparently doesn't have the problem, they use one LB per service, no such conflict. @@ -14,24 +16,25 @@ Service Annotation for Auto and specific load balancer mode ## Introduce Functions -- reconcileLoadBalancer(lb network.LoadBalancer, clusterName string, service *v1.Service, nodes []*v1.Node, wantLB bool) (network.LoadBalancer, error) - - Go through lb's properties, update based on wantLB +- reconcileLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node, wantLb bool) (*network.LoadBalancer, error) + - Go through lb's properties, update based on wantLb - If any change on the lb, no matter if the lb exists or not - Call az cloud to CreateOrUpdate on this lb, or Delete if nothing left - return lb, err -- reconcileSecurityGroup(sg network.SecurityGroup, clusterName string, service *v1.Service, wantLb bool) (network.SecurityGroup, error) - - Go though NSG' properties, update based on wantLB +- reconcileSecurityGroup(clusterName string, service *v1.Service, lbStatus *v1.LoadBalancerStatus, wantLb bool) (*network.SecurityGroup, error) + - Go though NSG' properties, update based on wantLb - If any change on the NSG, (the NSG should always exists) - Call az cloud to CreateOrUpdate on this NSG - return sg, err -- reconcilePublicIP(pipName string, clusterName string, service *v1.Service, wantLB bool) (error) - - if wantLB and external LB, - - ensure Azure Public IP resource is there - - when we ensure Public IP, it needs to be both Name and Tag match with the convention - - remove dangling Public IP that could have Name or Tag match with the service, but not both - - else, ensure Azure Public IP resource is not there +- reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error) + - List all the public ip in the resource group + - Make sure we only touch Public IP resources has tags[service] = "namespace/serviceName" + - skip for wantLb && !isInternal && pipName == desiredPipName + - delete other public ip resources if any + - if !isInternal && wantLb + - ensure Public IP with desiredPipName exists - getServiceLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node, wantLb bool) (lb, status, exists, error) - gets the loadbalancer for the service if it already exits @@ -44,15 +47,19 @@ Service Annotation for Auto and specific load balancer mode ### GetLoadBalancer - Get LoadBalancer status, return status, error - - If not exist, ensure it is there + - return the load balancer status for this service + - it will not create or update or delete any resource ### EnsureLoadBalancer -- Reconcile LB's related but not owned resources, such as Public IP, NSG rules - - Call reconcileSecurityGroup(sg, clusterName, service, true) - - Call reconcilePublicIP(pipName, cluster, service, true) +- Reconcile LB for the fliped service + - Call reconcileLoadBalancer(clusterName, flipedService, nil, false/* wantLb */) +- Reconcile Public IP + - Call reconcilePublicIP(cluster, service, true) - Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe. - - Call reconcileLoadBalancer(lb, clusterName, service, nodes, true) + - Call reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */) +- Reconcile NSG rules, it need to be called after reconcileLB + - Call reconcileSecurityGroup(clusterName, service, lbStatus, true /* wantLb */) ### UpdateLoadBalancer @@ -61,8 +68,8 @@ Service Annotation for Auto and specific load balancer mode ### EnsureLoadBalancerDeleted - Reconcile NSG first, before reconcile LB, because SG need LB to be there - - Call reconcileSecurityGroup(sg, clusterName, service, false) + - Call reconcileSecurityGroup(clusterName, service, nil, false /* wantLb */) - Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe. - - Call reconcileLoadBalancer(lb, clusterName, service, nodes, false) -- Reconcile LB's related but not owned resources, such as Public IP - - Call reconcilePublicIP(pipName, cluster, service, false) \ No newline at end of file + - Call reconcileLoadBalancer(clusterName, service, nodes, false) +- Reconcile Public IP, public IP needs related LB reconciled first + - Call reconcilePublicIP(cluster, service, false) \ No newline at end of file diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 3bbdda0e7b3..521cde9bf37 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -727,13 +727,13 @@ func TestReconcilePublicIPWithNewService(t *testing.T) { az := getTestCloud() svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) - pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } validatePublicIP(t, pip, &svc, true) - pip2, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB */) + pip2, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -748,7 +748,7 @@ func TestReconcilePublicIPRemoveService(t *testing.T) { az := getTestCloud() svc := getTestService("servicea", v1.ProtocolTCP, 80, 443) - pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -756,7 +756,7 @@ func TestReconcilePublicIPRemoveService(t *testing.T) { validatePublicIP(t, pip, &svc, true) // Remove the service - pip, err = az.reconcilePublicIP(testClusterName, &svc, false /* wantLB */) + pip, err = az.reconcilePublicIP(testClusterName, &svc, false /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -768,7 +768,7 @@ func TestReconcilePublicIPWithInternalService(t *testing.T) { az := getTestCloud() svc := getInternalTestService("servicea", 80, 443) - pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -780,7 +780,7 @@ func TestReconcilePublicIPWithExternalAndInternalSwitch(t *testing.T) { az := getTestCloud() svc := getInternalTestService("servicea", 80, 443) - pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + pip, err := az.reconcilePublicIP(testClusterName, &svc, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -788,14 +788,14 @@ func TestReconcilePublicIPWithExternalAndInternalSwitch(t *testing.T) { // Update to external service svcUpdated := getTestService("servicea", v1.ProtocolTCP, 80) - pip, err = az.reconcilePublicIP(testClusterName, &svcUpdated, true /* wantLB*/) + pip, err = az.reconcilePublicIP(testClusterName, &svcUpdated, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } validatePublicIP(t, pip, &svcUpdated, true) // Update to internal service again - pip, err = az.reconcilePublicIP(testClusterName, &svc, true /* wantLB*/) + pip, err = az.reconcilePublicIP(testClusterName, &svc, true /* wantLb*/) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -1214,9 +1214,9 @@ func describeFIPs(frontendIPs []network.FrontendIPConfiguration) string { return description } -func validatePublicIP(t *testing.T, publicIP *network.PublicIPAddress, service *v1.Service, wantLB bool) { +func validatePublicIP(t *testing.T, publicIP *network.PublicIPAddress, service *v1.Service, wantLb bool) { isInternal := requiresInternalLoadBalancer(service) - if isInternal || !wantLB { + if isInternal || !wantLb { if publicIP != nil { t.Errorf("Expected publicIP resource to be nil, when it is an internal service or doesn't want LB") } From 585dabc279c7b32e207f934ec0a072884e3d19ab Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Wed, 15 Nov 2017 10:26:33 -0800 Subject: [PATCH 03/18] rename azure interfaces to conform with golang convention --- pkg/cloudprovider/providers/azure/azure.go | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure.go b/pkg/cloudprovider/providers/azure/azure.go index a7cff34e774..dcff662f0f5 100644 --- a/pkg/cloudprovider/providers/azure/azure.go +++ b/pkg/cloudprovider/providers/azure/azure.go @@ -119,19 +119,22 @@ type Config struct { MaximumLoadBalancerRuleCount int `json:"maximumLoadBalancerRuleCount"` } -type iVirtualMachinesClient interface { +// VirtualMachinesClient defines needed functions for azure network.VirtualMachinesClient +type VirtualMachinesClient interface { CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) } -type iInterfacesClient interface { +// InterfacesClient defines needed functions for azure network.InterfacesClient +type InterfacesClient interface { CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) } -type iLoadBalancersClient interface { +// LoadBalancersClient defines needed functions for azure network.LoadBalancersClient +type LoadBalancersClient interface { CreateOrUpdate(resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, cancel <-chan struct{}) (<-chan network.LoadBalancer, <-chan error) Delete(resourceGroupName string, loadBalancerName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) Get(resourceGroupName string, loadBalancerName string, expand string) (result network.LoadBalancer, err error) @@ -139,7 +142,8 @@ type iLoadBalancersClient interface { ListNextResults(lastResult network.LoadBalancerListResult) (result network.LoadBalancerListResult, err error) } -type iPublicIPAddressesClient interface { +// PublicIPAddressesClient defines needed functions for azure network.PublicIPAddressesClient +type PublicIPAddressesClient interface { CreateOrUpdate(resourceGroupName string, publicIPAddressName string, parameters network.PublicIPAddress, cancel <-chan struct{}) (<-chan network.PublicIPAddress, <-chan error) Delete(resourceGroupName string, publicIPAddressName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) Get(resourceGroupName string, publicIPAddressName string, expand string) (result network.PublicIPAddress, err error) @@ -147,14 +151,16 @@ type iPublicIPAddressesClient interface { ListNextResults(lastResults network.PublicIPAddressListResult) (result network.PublicIPAddressListResult, err error) } -type iSubnetsClient interface { +// SubnetsClient defines needed functions for azure network.SubnetsClient +type SubnetsClient interface { CreateOrUpdate(resourceGroupName string, virtualNetworkName string, subnetName string, subnetParameters network.Subnet, cancel <-chan struct{}) (<-chan network.Subnet, <-chan error) Delete(resourceGroupName string, virtualNetworkName string, subnetName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) Get(resourceGroupName string, virtualNetworkName string, subnetName string, expand string) (result network.Subnet, err error) List(resourceGroupName string, virtualNetworkName string) (result network.SubnetListResult, err error) } -type iSecurityGroupsClient interface { +// SecurityGroupsClient defines needed functions for azure network.SecurityGroupsClient +type SecurityGroupsClient interface { CreateOrUpdate(resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, cancel <-chan struct{}) (<-chan network.SecurityGroup, <-chan error) Delete(resourceGroupName string, networkSecurityGroupName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) Get(resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error) @@ -166,13 +172,13 @@ type Cloud struct { Config Environment azure.Environment RoutesClient network.RoutesClient - SubnetsClient iSubnetsClient - InterfacesClient iInterfacesClient + SubnetsClient SubnetsClient + InterfacesClient InterfacesClient RouteTablesClient network.RouteTablesClient - LoadBalancerClient iLoadBalancersClient - PublicIPAddressesClient iPublicIPAddressesClient - SecurityGroupsClient iSecurityGroupsClient - VirtualMachinesClient iVirtualMachinesClient + LoadBalancerClient LoadBalancersClient + PublicIPAddressesClient PublicIPAddressesClient + SecurityGroupsClient SecurityGroupsClient + VirtualMachinesClient VirtualMachinesClient StorageAccountClient storage.AccountsClient DisksClient disk.DisksClient operationPollRateLimiter flowcontrol.RateLimiter From 408f7396183b8d0af8eb791ea37257b302800817 Mon Sep 17 00:00:00 2001 From: NIkhil Bhatia Date: Wed, 15 Nov 2017 12:52:59 -0800 Subject: [PATCH 04/18] code-review- add logs and comments (#11) add logs and comments & fix getMasterNode --- .../providers/azure/azure_util.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_util.go b/pkg/cloudprovider/providers/azure/azure_util.go index 3c98e4b08d8..cdacf7568d4 100644 --- a/pkg/cloudprovider/providers/azure/azure_util.go +++ b/pkg/cloudprovider/providers/azure/azure_util.go @@ -135,12 +135,16 @@ func (az *Cloud) getpublicIPAddressID(pipName string) string { } // select load balancer for the service in the cluster +// the selection algorithm selectes the the load balancer with currently has +// the minimum lb rules, there there are multiple LB's with same number of rules +// it selects the first one (sorted based on name) func (az *Cloud) selectLoadBalancer(clusterName string, service *v1.Service, existingLBs *[]network.LoadBalancer, nodes []*v1.Node) (selectedLB *network.LoadBalancer, existsLb bool, err error) { isInternal := requiresInternalLoadBalancer(service) serviceName := getServiceName(service) glog.V(3).Infof("selectLoadBalancer(%s): isInternal(%s) - start", serviceName, isInternal) availabilitySetNames, err := az.getLoadBalancerAvailabilitySetNames(service, nodes) if err != nil { + glog.Errorf("az.selectLoadBalancer: cluster (%s) service(%s) - az.getLoadBalancerAvailabilitySetNames failed, err=(%v)", clusterName, serviceName, err) return nil, false, err } glog.Infof("selectLoadBalancer(%s): isInternal(%s) - availabilitysetsname %v", serviceName, isInternal, *availabilitySetNames) @@ -198,15 +202,17 @@ func (az *Cloud) selectLoadBalancer(clusterName string, service *v1.Service, exi func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes []*v1.Node) (availabilitySetNames *[]string, err error) { hasMode, isAuto, serviceASL := getServiceLoadBalancerMode(service) if !hasMode { - // legacy load balancer auto mode load balancer. + // no mode specified in service annotation default to PrimaryAvailabilitySetName availabilitySetNames = &[]string{az.Config.PrimaryAvailabilitySetName} return availabilitySetNames, nil } availabilitySetNames, err = az.getAgentPoolAvailabiliySets(nodes) if err != nil { + glog.Errorf("az.getLoadBalancerAvailabilitySetNames - getAgentPoolAvailabiliySets failed err=(%v)", err) return nil, err } if len(*availabilitySetNames) == 0 { + glog.Errorf("az.getLoadBalancerAvailabilitySetNames - No availability sets found for nodes in the cluster, node count(%d)", len(nodes)) return nil, fmt.Errorf("No availability sets found for nodes, node count(%d)", len(nodes)) } // sort the list to have deterministic selection @@ -226,6 +232,7 @@ func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes } } if !found { + glog.Errorf("az.getLoadBalancerAvailabilitySetNames - Availability set (%s) in service annotation not found", serviceASL[sasx]) return nil, fmt.Errorf("availability set (%s) - not found", serviceASL[sasx]) } } @@ -240,6 +247,7 @@ func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]string, err error) { vms, err := az.VirtualMachineClientListWithRetry() if err != nil { + glog.Errorf("az.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err) return nil, err } vmNameToAvailabilitySetID := make(map[string]string, len(vms)) @@ -258,6 +266,7 @@ func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]s } asID, ok := vmNameToAvailabilitySetID[nodeName] if !ok { + glog.Errorf("az.getNodeAvailabilitySet - Node(%s) has no availability sets", nodeName) return nil, fmt.Errorf("Node (%s) - has no availability sets", nodeName) } if availabilitySetIDs.Has(asID) { @@ -266,7 +275,7 @@ func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]s } asName, err := getLastSegment(asID) if err != nil { - glog.Errorf("az.getNodeAvailabilitySet(%s), getLastSegment(%s), err=%v", nodeName, asID, err) + glog.Errorf("az.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err) return nil, err } // AvailabilitySet ID is currently upper cased in a indeterministic way @@ -307,10 +316,8 @@ func (az *Cloud) getLoadBalancerName(clusterName string, availabilitySetName str // The master role is determined by looking for: // * a kubernetes.io/role="master" label func isMasterNode(node *v1.Node) bool { - for k, v := range node.Labels { - if k == nodeLabelRole && v == "master" { - return true - } + if val, ok := node.Labels[nodeLabelRole]; ok && val == "master" { + return true } return false From 69abfa676d91b54e1956051781010a0b749628f4 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Wed, 15 Nov 2017 17:34:09 -0800 Subject: [PATCH 05/18] naming, comment, typo correction --- .../providers/azure/azure_backoff.go | 44 +++++++++---------- .../providers/azure/azure_loadbalancer.go | 33 +++++--------- .../providers/azure/azure_loadbalancer.md | 4 +- .../providers/azure/azure_test.go | 17 ++++--- 4 files changed, 44 insertions(+), 54 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index 32f3a5c0517..6988d3c4ed3 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -26,10 +26,10 @@ import ( "k8s.io/apimachinery/pkg/types" ) -// getorCreateRequestBackoff returns a new Backoff object steps = 1 +// getOrCreateRequestBackoff returns a new Backoff object steps = 1 // This is to make sure that the requested command executes // at least once -func (az *Cloud) getorCreateRequestBackoff() (resourceRequestBackoff wait.Backoff) { +func (az *Cloud) getOrCreateRequestBackoff() (resourceRequestBackoff wait.Backoff) { if az.CloudProviderBackoff { return az.resourceRequestBackoff } @@ -44,7 +44,7 @@ func (az *Cloud) getorCreateRequestBackoff() (resourceRequestBackoff wait.Backof func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) { var machine compute.VirtualMachine var exists bool - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error machine, exists, retryErr = az.getVirtualMachine(name) if retryErr != nil { @@ -60,7 +60,7 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua // VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) { var machine compute.VirtualMachine - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types) @@ -78,7 +78,7 @@ func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, error) { allNodes := []compute.VirtualMachine{} var result compute.VirtualMachineListResult - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.List(%v): start", az.ResourceGroup) @@ -103,7 +103,7 @@ func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, appendResults = false // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): start", az.ResourceGroup) @@ -130,7 +130,7 @@ func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, // GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { var ip string - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error ip, retryErr = az.getIPForMachine(name) if retryErr != nil { @@ -145,7 +145,7 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): start", *sg.Name) respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) @@ -158,7 +158,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { // CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): start", *lb.Name) respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil) @@ -169,12 +169,12 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error { }) } -// ListLBWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry +// ListLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { allLBs := []network.LoadBalancer{} var result network.LoadBalancerListResult - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.List(%v): start", az.ResourceGroup) @@ -200,7 +200,7 @@ func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): start", az.ResourceGroup) @@ -229,7 +229,7 @@ func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { allPIPs := []network.PublicIPAddress{} var result network.PublicIPAddressListResult - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.List(%v): start", az.ResourceGroup) @@ -255,7 +255,7 @@ func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): start", az.ResourceGroup) @@ -282,7 +282,7 @@ func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { // CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s): start", *pip.Name) respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil) @@ -295,7 +295,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { // CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): start", *nic.Name) respChan, errChan := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil) @@ -308,7 +308,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { // DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.Delete(%s): start", pipName) respChan, errChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) @@ -321,7 +321,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { // DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry func (az *Cloud) DeleteLBWithRetry(lbName string) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.Delete(%s): start", lbName) respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) @@ -334,7 +334,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error { // CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%s): start", *routeTable.Name) respChan, errChan := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil) @@ -347,7 +347,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable // CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): start", *route.Name) respChan, errChan := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil) @@ -360,7 +360,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { // DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry func (az *Cloud) DeleteRouteWithRetry(routeName string) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.Delete(%s): start", az.RouteTableName) respChan, errChan := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil) @@ -373,7 +373,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error { // CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error { - return wait.ExponentialBackoff(az.getorCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s): start", vmName) respChan, errChan := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 54f6b7d08f3..9e52f4a5de0 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -79,8 +79,8 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod // Here we'll firstly ensure service do not lie in the opposite LB. serviceName := getServiceName(service) glog.V(5).Infof("ensureloadbalancer(%s): START clusterName=%q", serviceName, clusterName) - flipedService := flipServiceInternalAnnotation(service) - if _, err := az.reconcileLoadBalancer(clusterName, flipedService, nil, false /* wantLb */); err != nil { + flippedService := flipServiceInternalAnnotation(service) + if _, err := az.reconcileLoadBalancer(clusterName, flippedService, nil, false /* wantLb */); err != nil { return nil, err } @@ -136,7 +136,7 @@ func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Servi return nil } -// getServiceLoadBalancer gets the loadbalancer for the service if it already exits +// getServiceLoadBalancer gets the loadbalancer for the service if it already exists // If wantLb is TRUE then -it selects a new load balancer // In case the selected load balancer does not exists it returns network.LoadBalancer struct // with added metadata (such as name, location) and existsLB set to FALSE @@ -258,9 +258,11 @@ func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service) func flipServiceInternalAnnotation(service *v1.Service) *v1.Service { copyService := service.DeepCopy() - if _, ok := copyService.Annotations[ServiceAnnotationLoadBalancerInternal]; ok { + if v, ok := copyService.Annotations[ServiceAnnotationLoadBalancerInternal]; ok && v == "true" { + // If it is internal now, we make it external by remove the annotation delete(copyService.Annotations, ServiceAnnotationLoadBalancerInternal) } else { + // If it is external now, we make it internal copyService.Annotations[ServiceAnnotationLoadBalancerInternal] = "true" } return copyService @@ -628,8 +630,8 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, // If it is not exist, and no change to that, we don't CreateOrUpdate LB if dirtyLb { if lb.FrontendIPConfigurations == nil || len(*lb.FrontendIPConfigurations) == 0 { - // When FrontendIPConfigurations is empty, we need to delete the Azure LoadBalancer resource itself - // Because delete all FrontendIPConfigurations in LB is not supported, we have to delete the LB itself + // When FrontendIPConfigurations is empty, we need to delete the Azure load balancer resource itself, + // because an Azure load balancer cannot have an empty FrontendIPConfigurations collection glog.V(3).Infof("delete(%s): lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) az.operationPollRateLimiter.Accept() @@ -718,14 +720,6 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, return nil, err } - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName) - sg, err = az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") - glog.V(10).Infof("SecurityGroupsClient.Get(%q): end", az.SecurityGroupName) - if err != nil { - return nil, err - } - destinationIPAddress := "" if wantLb { // Get lbIP since we make up NSG rules based on ingress IP @@ -846,7 +840,6 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, } // This reconciles the PublicIP resources similar to how the LB is reconciled. -// This entails adding required, missing SecurityRules and removing stale rules. func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error) { isInternal := requiresInternalLoadBalancer(service) serviceName := getServiceName(service) @@ -869,9 +862,7 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, want if wantLb && !isInternal && pipName == desiredPipName { // This is the only case we should preserve the // Public ip resource with match service tag - // We could do nothing here, we will ensure that out of the loop } else { - // We use tag to decide which IP should be removed glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName) az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): start", pipName) @@ -899,12 +890,12 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, want if !isInternal && wantLb { // Confirm desired public ip resource exists - var rpip *network.PublicIPAddress + var pip *network.PublicIPAddress domainNameLabel := getPublicIPLabel(service) - if rpip, err = az.ensurePublicIPExists(serviceName, desiredPipName, domainNameLabel); err != nil { + if pip, err = az.ensurePublicIPExists(serviceName, desiredPipName, domainNameLabel); err != nil { return nil, err } - return rpip, nil + return pip, nil } return nil, nil } @@ -972,7 +963,7 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b expectedAvailabilitySetName := az.getAvailabilitySetID(availabilitySetName) if machine.AvailabilitySet == nil || !strings.EqualFold(*machine.AvailabilitySet.ID, expectedAvailabilitySetName) { glog.V(3).Infof( - "nicupdate(%s): skipping nic (%s) since it is not in the availabilitSet(%s)", + "nicupdate(%s): skipping nic (%s) since it is not in the availabilitySet(%s)", serviceName, nicName, availabilitySetName) return nil } diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md index 431056893fb..05a560b75b7 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md @@ -37,7 +37,7 @@ Service Annotation for Auto and specific load balancer mode - ensure Public IP with desiredPipName exists - getServiceLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node, wantLb bool) (lb, status, exists, error) - - gets the loadbalancer for the service if it already exits + - gets the loadbalancer for the service if it already exists - If wantLb is TRUE then -it selects a new load balancer, the selction helps distribute the services across load balancers - In case the selected load balancer does not exists it returns network.LoadBalancer struct with added metadata (such as name, location) and existsLB set to FALSE - By default - cluster default LB is returned @@ -52,7 +52,7 @@ Service Annotation for Auto and specific load balancer mode ### EnsureLoadBalancer -- Reconcile LB for the fliped service +- Reconcile LB for the flipped service - Call reconcileLoadBalancer(clusterName, flipedService, nil, false/* wantLb */) - Reconcile Public IP - Call reconcilePublicIP(cluster, service, true) diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 521cde9bf37..8d6343d18f4 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -127,12 +127,12 @@ func testLoadBalancerServiceDefaultModeSelection(t *testing.T, isInternal bool) ruleCount := len(*lb.LoadBalancingRules) if ruleCount != index { - t.Errorf("lb rule could should be equal to nuber of services deployed, expected (%d) Found (%d)", index, ruleCount) + t.Errorf("lb rule count should be equal to nuber of services deployed, expected (%d) Found (%d)", index, ruleCount) } } } -// Validate even distribution of external services across load balances +// Validate even distribution of external services across load balancers // based on number of availability sets func testLoadBalancerServiceAutoModeSelection(t *testing.T, isInternal bool) { az := getTestCloud() @@ -173,8 +173,7 @@ func testLoadBalancerServiceAutoModeSelection(t *testing.T, isInternal bool) { maxRules := 0 minRules := serviceCount - for x := range *result.Value { - lb := (*result.Value)[x] + for _, lb := range *result.Value { ruleCount := len(*lb.LoadBalancingRules) if ruleCount < minRules { minRules = ruleCount @@ -737,7 +736,7 @@ func TestReconcilePublicIPWithNewService(t *testing.T) { if err != nil { t.Errorf("Unexpected error: %q", err) } - validatePublicIP(t, pip, &svc, true) + validatePublicIP(t, pip2, &svc, true) if pip.Name != pip2.Name || pip.PublicIPAddressPropertiesFormat.IPAddress != pip2.PublicIPAddressPropertiesFormat.IPAddress { t.Errorf("We should get the exact same public ip resource after a second reconcile") @@ -814,7 +813,7 @@ func getTestCloud() (az *Cloud) { SubnetName: "subnet", SecurityGroupName: "nsg", RouteTableName: "rt", - PrimaryAvailabilitySetName: "asName", + PrimaryAvailabilitySetName: "as", MaximumLoadBalancerRuleCount: 250, }, } @@ -832,8 +831,8 @@ func getTestCloud() (az *Cloud) { const networkInterfacesIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkInterfaces/%s" const primaryIPConfigIDTemplate = "%s/ipConfigurations/ipconfig" -// returns the full identifier of a publicIPAddress. -func getNetworkInterfacesID(subscriptionID string, resourceGroupName, nicName string) string { +// returns the full identifier of Network Interface. +func getNetworkInterfaceID(subscriptionID string, resourceGroupName, nicName string) string { return fmt.Sprintf( networkInterfacesIDTemplate, subscriptionID, @@ -892,7 +891,7 @@ func getClusterResources(az *Cloud, vmCount int, availabilitySetCount int) (clus clusterResources.availabilitySetNames = append(clusterResources.availabilitySetNames, asName) nicName := getNICName(vmIndex) - nicID := getNetworkInterfacesID(az.Config.SubscriptionID, az.Config.ResourceGroup, nicName) + nicID := getNetworkInterfaceID(az.Config.SubscriptionID, az.Config.ResourceGroup, nicName) primaryIPConfigID := getPrimaryIPConfigID(nicID) isPrimary := true newNIC := network.Interface{ From e8c65f713009b9b6429611d70021b7eed1489d6f Mon Sep 17 00:00:00 2001 From: NIkhil Bhatia Date: Thu, 16 Nov 2017 10:23:21 -0800 Subject: [PATCH 06/18] address more code review comments --- .../providers/azure/azure_backoff.go | 43 +++---- .../providers/azure/azure_loadbalancer.go | 116 +++++++++++++++--- .../providers/azure/azure_test.go | 52 ++++---- .../providers/azure/azure_util.go | 86 ++----------- 4 files changed, 161 insertions(+), 136 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index 6988d3c4ed3..3947e912a39 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -26,10 +26,11 @@ import ( "k8s.io/apimachinery/pkg/types" ) -// getOrCreateRequestBackoff returns a new Backoff object steps = 1 +// requestBackoff if backoff is disabled in cloud provider it +// returns a new Backoff object steps = 1 // This is to make sure that the requested command executes // at least once -func (az *Cloud) getOrCreateRequestBackoff() (resourceRequestBackoff wait.Backoff) { +func (az *Cloud) requestBackoff() (resourceRequestBackoff wait.Backoff) { if az.CloudProviderBackoff { return az.resourceRequestBackoff } @@ -44,7 +45,7 @@ func (az *Cloud) getOrCreateRequestBackoff() (resourceRequestBackoff wait.Backof func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) { var machine compute.VirtualMachine var exists bool - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error machine, exists, retryErr = az.getVirtualMachine(name) if retryErr != nil { @@ -60,7 +61,7 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua // VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) { var machine compute.VirtualMachine - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types) @@ -78,7 +79,7 @@ func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, error) { allNodes := []compute.VirtualMachine{} var result compute.VirtualMachineListResult - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.List(%v): start", az.ResourceGroup) @@ -103,7 +104,7 @@ func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, appendResults = false // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): start", az.ResourceGroup) @@ -130,7 +131,7 @@ func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, // GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { var ip string - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error ip, retryErr = az.getIPForMachine(name) if retryErr != nil { @@ -145,7 +146,7 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): start", *sg.Name) respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) @@ -158,7 +159,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { // CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): start", *lb.Name) respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil) @@ -174,7 +175,7 @@ func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { allLBs := []network.LoadBalancer{} var result network.LoadBalancerListResult - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.List(%v): start", az.ResourceGroup) @@ -200,7 +201,7 @@ func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): start", az.ResourceGroup) @@ -229,7 +230,7 @@ func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) { func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { allPIPs := []network.PublicIPAddress{} var result network.PublicIPAddressListResult - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.List(%v): start", az.ResourceGroup) @@ -255,7 +256,7 @@ func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { // follow the next link to get all the vms for resource group if result.NextLink != nil { - err := wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): start", az.ResourceGroup) @@ -282,7 +283,7 @@ func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) { // CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s): start", *pip.Name) respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil) @@ -295,7 +296,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error { // CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): start", *nic.Name) respChan, errChan := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil) @@ -308,7 +309,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error { // DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.Delete(%s): start", pipName) respChan, errChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) @@ -321,7 +322,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error { // DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry func (az *Cloud) DeleteLBWithRetry(lbName string) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.Delete(%s): start", lbName) respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) @@ -334,7 +335,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error { // CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%s): start", *routeTable.Name) respChan, errChan := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil) @@ -347,7 +348,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable // CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): start", *route.Name) respChan, errChan := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil) @@ -360,7 +361,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { // DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry func (az *Cloud) DeleteRouteWithRetry(routeName string) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("RoutesClient.Delete(%s): start", az.RouteTableName) respChan, errChan := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil) @@ -373,7 +374,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error { // CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error { - return wait.ExponentialBackoff(az.getOrCreateRequestBackoff(), func() (bool, error) { + return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s): start", vmName) respChan, errChan := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 9e52f4a5de0..72ad6cfca63 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -18,11 +18,13 @@ package azure import ( "fmt" + "math" "strconv" "strings" "k8s.io/api/core/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" serviceapi "k8s.io/kubernetes/pkg/api/v1/service" "github.com/Azure/azure-sdk-for-go/arm/compute" @@ -41,6 +43,13 @@ const ServiceAnnotationLoadBalancerInternalSubnet = "service.beta.kubernetes.io/ // ServiceAnnotationLoadBalancerMode is the annotation used on the service to specify the // Azure load balancer selection based on availability sets +// There are currently three possible load balancer selection modes : +// 1. Default mode - service has no annotation ("service.beta.kubernetes.io/azure-load-balancer-mode") +// In this case the Loadbalancer of the primary Availability set is selected +// 2. "__auto__" mode - service is annotated with __auto__ value, this when loadbalancer from any availability set +// is selected which has the miinimum rules associated with it. +// 3. "as1,as2" mode - this is when the laod balancer from the specified availability sets is selected that has the +// miinimum rules associated with it. const ServiceAnnotationLoadBalancerMode = "service.beta.kubernetes.io/azure-load-balancer-mode" // ServiceAnnotationLoadBalancerAutoModeValue the annotation used on the service to specify the @@ -146,20 +155,21 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, var defaultLB *network.LoadBalancer defaultLBName := az.getLoadBalancerName(clusterName, az.Config.PrimaryAvailabilitySetName, isInternal) - lbs, err := az.ListLBWithRetry() + existingLBs, err := az.ListLBWithRetry() if err != nil { return nil, nil, false, err } - if lbs != nil { - for lbx := range lbs { - lb := &(lbs[lbx]) - if strings.EqualFold(*lb.Name, defaultLBName) { - defaultLB = lb + + // check if the service already has a load balancer + if existingLBs != nil { + for _, existingLB := range existingLBs { + if strings.EqualFold(*existingLB.Name, defaultLBName) { + defaultLB = &existingLB } - if isInternalLoadBalancer(lb) != isInternal { + if isInternalLoadBalancer(&existingLB) != isInternal { continue } - status, err = az.getServiceLoadBalancerStatus(service, lb) + status, err = az.getServiceLoadBalancerStatus(service, &existingLB) if err != nil { return nil, nil, false, err } @@ -168,19 +178,22 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, continue } - return lb, status, true, nil + return &existingLB, status, true, nil } } + // service does not have a load balancer, select one if wantLb { // select new load balancer for service - lb, exists, err = az.selectLoadBalancer(clusterName, service, &lbs, nodes) + selectedLB, exists, err := az.selectLoadBalancer(clusterName, service, &existingLBs, nodes) if err != nil { return nil, nil, false, err } - return lb, nil, exists, err + return selectedLB, nil, exists, err } + + // create a default LB with meta data if not present if defaultLB == nil { defaultLB = &network.LoadBalancer{ Name: &defaultLBName, @@ -192,6 +205,66 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, return defaultLB, nil, false, nil } +// select load balancer for the service in the cluster +// the selection algorithm selectes the the load balancer with currently has +// the minimum lb rules, there there are multiple LB's with same number of rules +// it selects the first one (sorted based on name) +func (az *Cloud) selectLoadBalancer(clusterName string, service *v1.Service, existingLBs *[]network.LoadBalancer, nodes []*v1.Node) (selectedLB *network.LoadBalancer, existsLb bool, err error) { + isInternal := requiresInternalLoadBalancer(service) + serviceName := getServiceName(service) + glog.V(3).Infof("selectLoadBalancer(%s): isInternal(%s) - start", serviceName, isInternal) + availabilitySetNames, err := az.getLoadBalancerAvailabilitySetNames(service, nodes) + if err != nil { + glog.Errorf("az.selectLoadBalancer: cluster(%s) service(%s) isInternal(%t) - az.getLoadBalancerAvailabilitySetNames failed, err=(%v)", clusterName, serviceName, isInternal, err) + return nil, false, err + } + glog.Infof("selectLoadBalancer: cluster(%s) service(%s) isInternal(%t) - availabilitysetsnames %v", clusterName, serviceName, isInternal, *availabilitySetNames) + mapExistingLBs := map[string]network.LoadBalancer{} + for _, lb := range *existingLBs { + mapExistingLBs[*lb.Name] = lb + } + selectedLBRuleCount := math.MaxInt32 + for _, currASName := range *availabilitySetNames { + currLBName := az.getLoadBalancerName(clusterName, currASName, isInternal) + lb, exists := mapExistingLBs[currLBName] + if !exists { + // select this LB as this is a new LB and will have minimum rules + // create tmp lb struct to hold metadata for the new load-balancer + selectedLB = &network.LoadBalancer{ + Name: &currLBName, + Location: &az.Location, + LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{}, + } + + return selectedLB, false, nil + } + + lbRules := *lb.LoadBalancingRules + currLBRuleCount := 0 + if lbRules != nil { + currLBRuleCount = len(lbRules) + } + if currLBRuleCount < selectedLBRuleCount { + selectedLBRuleCount = currLBRuleCount + selectedLB = &lb + } + } + + if selectedLB == nil { + err = fmt.Errorf("selectLoadBalancer: cluster(%s) service(%s) isInternal(%t) - unable to find load balancer for selected availability sets %v", clusterName, serviceName, isInternal, *availabilitySetNames) + glog.Error(err) + return nil, false, err + } + // validate if the selected LB has not exceeded the MaximumLoadBalancerRuleCount + if az.Config.MaximumLoadBalancerRuleCount != 0 && selectedLBRuleCount >= az.Config.MaximumLoadBalancerRuleCount { + err = fmt.Errorf("selectLoadBalancer: cluster(%s) service(%s) isInternal(%t) - all available load balancers have exceeded maximum rule limit %d, availabilitysetnames (%v)", clusterName, serviceName, isInternal, selectedLBRuleCount, *availabilitySetNames) + glog.Error(err) + return selectedLB, existsLb, err + } + + return selectedLB, existsLb, nil +} + func (az *Cloud) getServiceLoadBalancerStatus(service *v1.Service, lb *network.LoadBalancer) (status *v1.LoadBalancerStatus, err error) { if lb == nil { glog.V(10).Infof("getServiceLoadBalancerStatus lb is nil") @@ -1043,15 +1116,26 @@ func subnet(service *v1.Service) *string { return nil } -func getServiceLoadBalancerMode(service *v1.Service) (hasMode bool, isAuto bool, asl []string) { +// getServiceLoadBalancerMode parses the mode value +// if the value is __auto__ it returns isAuto = TRUE +// if anything else it returns the unique availability set names after triming spaces +func getServiceLoadBalancerMode(service *v1.Service) (hasMode bool, isAuto bool, availabilitySetNames []string) { mode, hasMode := service.Annotations[ServiceAnnotationLoadBalancerMode] + mode = strings.TrimSpace(mode) isAuto = strings.EqualFold(mode, ServiceAnnotationLoadBalancerAutoModeValue) if !isAuto { - asTagList := strings.TrimSpace(mode) - // Break up list of "AS1,AS2" - asl = strings.Split(asTagList, ",") + availabilitySetParsedList := strings.Split(mode, ",") + + // Trim the availability set names and remove duplicates + // e.g. {"AS1"," AS2", "AS3", "AS3"} => {"AS1", "AS2", "AS3"} + availabilitySetNameSet := sets.NewString() + for _, v := range availabilitySetParsedList { + availabilitySetNameSet.Insert(strings.TrimSpace(v)) + } + + availabilitySetNames = availabilitySetNameSet.List() } - return hasMode, isAuto, asl + return hasMode, isAuto, availabilitySetNames } diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 8d6343d18f4..73af15642c2 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -19,6 +19,7 @@ package azure import ( "encoding/json" "fmt" + "math" "net/http" "net/http/httptest" "reflect" @@ -161,10 +162,8 @@ func testLoadBalancerServiceAutoModeSelection(t *testing.T, isInternal bool) { t.Errorf("Unexpected error: %s", svcName) } - expectedNumOfLB := index % availabilitySetCount - if index >= availabilitySetCount { - expectedNumOfLB = availabilitySetCount - } + // expected is MIN(index, availabilitySetCount) + expectedNumOfLB := int(math.Min(float64(index), float64(availabilitySetCount))) result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) lbCount := len(*result.Value) if lbCount != expectedNumOfLB { @@ -192,6 +191,9 @@ func testLoadBalancerServiceAutoModeSelection(t *testing.T, isInternal bool) { // Validate availability set selection of services across load balancers // based on provided availability sets through service annotation +// The scenario is that there are 4 availability sets in the agent pool but the +// services will be assigned load balancers that are part of the provided availability sets +// specified in service annotation func testLoadBalancerServicesSpecifiedSelection(t *testing.T, isInternal bool) { az := getTestCloud() const vmCount = 8 @@ -201,8 +203,8 @@ func testLoadBalancerServicesSpecifiedSelection(t *testing.T, isInternal bool) { clusterResources := getClusterResources(az, vmCount, availabilitySetCount) getTestSecurityGroup(az) - selectedAvailabilitySetName1 := getASName(az, 1, availabilitySetCount) - selectedAvailabilitySetName2 := getASName(az, 2, availabilitySetCount) + selectedAvailabilitySetName1 := getAvailabilitySetName(az, 1, availabilitySetCount) + selectedAvailabilitySetName2 := getAvailabilitySetName(az, 2, availabilitySetCount) for index := 1; index <= serviceCount; index++ { svcName := fmt.Sprintf("service-%d", index) var svc v1.Service @@ -223,10 +225,8 @@ func testLoadBalancerServicesSpecifiedSelection(t *testing.T, isInternal bool) { t.Errorf("Unexpected error: %s", svcName) } - expectedNumOfLB := index % 2 - if index >= 2 { - expectedNumOfLB = 2 - } + // expected is MIN(index, 2) + expectedNumOfLB := int(math.Min(float64(index), float64(2))) result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) lbCount := len(*result.Value) if lbCount != expectedNumOfLB { @@ -263,14 +263,12 @@ func testLoadBalancerMaxRulesServices(t *testing.T, isInternal bool) { t.Errorf("Unexpected error: %s", svcName) } - expectedNumOfLB := index % az.Config.MaximumLoadBalancerRuleCount - if index >= az.Config.MaximumLoadBalancerRuleCount { - expectedNumOfLB = az.Config.MaximumLoadBalancerRuleCount - } + // expected is MIN(index, az.Config.MaximumLoadBalancerRuleCount) + expectedNumOfLBRules := int(math.Min(float64(index), float64(az.Config.MaximumLoadBalancerRuleCount))) result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) lbCount := len(*result.Value) - if lbCount != expectedNumOfLB { - t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLB, lbCount) + if lbCount != expectedNumOfLBRules { + t.Errorf("Unexpected number of LB's: Expected (%d) Found (%d)", expectedNumOfLBRules, lbCount) } } @@ -286,11 +284,15 @@ func testLoadBalancerMaxRulesServices(t *testing.T, isInternal bool) { _, err := az.EnsureLoadBalancer(testClusterName, &svc, clusterResources.nodes) if err == nil { t.Errorf("Expect any new service to fail as max limit in lb has reached") + } else { + expectedErrMessageSubString := "all available load balancers have exceeded maximum rule limit" + if !strings.Contains(err.Error(), expectedErrMessageSubString) { + t.Errorf("Error message returned is not expected, expected sub string=%s, actual error message=%v", expectedErrMessageSubString, err) + } } } -// Validate even distribution of external services across load balances -// based on number of availability sets +// Validate service deletion in lb auto selection mode func testLoadBalancerServiceAutoModeDeleteSelection(t *testing.T, isInternal bool) { az := getTestCloud() const vmCount = 8 @@ -331,10 +333,8 @@ func testLoadBalancerServiceAutoModeDeleteSelection(t *testing.T, isInternal boo setLoadBalancerAutoModeAnnotation(&svc) - expectedNumOfLB := index % availabilitySetCount - if index >= availabilitySetCount { - expectedNumOfLB = availabilitySetCount - } + // expected is MIN(index, availabilitySetCount) + expectedNumOfLB := int(math.Min(float64(index), float64(availabilitySetCount))) result, _ := az.LoadBalancerClient.List(az.Config.ResourceGroup) lbCount := len(*result.Value) if lbCount != expectedNumOfLB { @@ -859,7 +859,7 @@ func getVMName(vmIndex int) string { return getTestResourceName(TestVMResourceBaseName, vmIndex) } -func getASName(az *Cloud, vmIndex int, numAS int) string { +func getAvailabilitySetName(az *Cloud, vmIndex int, numAS int) string { asIndex := vmIndex % numAS if asIndex == 0 { return az.Config.PrimaryAvailabilitySetName @@ -868,8 +868,10 @@ func getASName(az *Cloud, vmIndex int, numAS int) string { return getTestResourceName(TestASResourceBaseName, asIndex) } +// test supporting on 1 nic per vm +// we really dont care about the name of the nic +// just using the vm name for testing purposes func getNICName(vmIndex int) string { - // test supporting on 1 nic per vm return getVMName(vmIndex) } @@ -887,7 +889,7 @@ func getClusterResources(az *Cloud, vmCount int, availabilitySetCount int) (clus clusterResources.availabilitySetNames = []string{} for vmIndex := 0; vmIndex < vmCount; vmIndex++ { vmName := getVMName(vmIndex) - asName := getASName(az, vmIndex, availabilitySetCount) + asName := getAvailabilitySetName(az, vmIndex, availabilitySetCount) clusterResources.availabilitySetNames = append(clusterResources.availabilitySetNames, asName) nicName := getNICName(vmIndex) diff --git a/pkg/cloudprovider/providers/azure/azure_util.go b/pkg/cloudprovider/providers/azure/azure_util.go index cdacf7568d4..04ff821e76a 100644 --- a/pkg/cloudprovider/providers/azure/azure_util.go +++ b/pkg/cloudprovider/providers/azure/azure_util.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "hash/crc32" - "math" "regexp" "sort" "strconv" @@ -134,73 +133,12 @@ func (az *Cloud) getpublicIPAddressID(pipName string) string { pipName) } -// select load balancer for the service in the cluster -// the selection algorithm selectes the the load balancer with currently has -// the minimum lb rules, there there are multiple LB's with same number of rules -// it selects the first one (sorted based on name) -func (az *Cloud) selectLoadBalancer(clusterName string, service *v1.Service, existingLBs *[]network.LoadBalancer, nodes []*v1.Node) (selectedLB *network.LoadBalancer, existsLb bool, err error) { - isInternal := requiresInternalLoadBalancer(service) - serviceName := getServiceName(service) - glog.V(3).Infof("selectLoadBalancer(%s): isInternal(%s) - start", serviceName, isInternal) - availabilitySetNames, err := az.getLoadBalancerAvailabilitySetNames(service, nodes) - if err != nil { - glog.Errorf("az.selectLoadBalancer: cluster (%s) service(%s) - az.getLoadBalancerAvailabilitySetNames failed, err=(%v)", clusterName, serviceName, err) - return nil, false, err - } - glog.Infof("selectLoadBalancer(%s): isInternal(%s) - availabilitysetsname %v", serviceName, isInternal, *availabilitySetNames) - mapExistingLBs := map[string]*network.LoadBalancer{} - for lbx := range *existingLBs { - lb := (*existingLBs)[lbx] - mapExistingLBs[*lb.Name] = &lb - } - selectedLBRuleCount := math.MaxInt32 - for asx := range *availabilitySetNames { - currASName := (*availabilitySetNames)[asx] - currLBName := az.getLoadBalancerName(clusterName, currASName, isInternal) - lb, ok := mapExistingLBs[currLBName] - if !ok { - // select this LB as this is a new LB and will have minimum rules - // create tmp lb struct to hold metadata for the new load-balancer - selectedLB = &network.LoadBalancer{ - Name: &currLBName, - Location: &az.Location, - LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{}, - } - - return selectedLB, false, nil - } - - lbRules := *lb.LoadBalancingRules - currLBRuleCount := 0 - if lbRules != nil { - currLBRuleCount = len(lbRules) - } - if currLBRuleCount < selectedLBRuleCount { - selectedLBRuleCount = currLBRuleCount - selectedLB = lb - } - } - - if selectedLB == nil { - glog.Errorf("selectLoadBalancer service (%s) - unable to find load balancer for selected availability sets %v", serviceName, *availabilitySetNames) - return nil, false, fmt.Errorf("selectLoadBalancer (%s)- unable to find load balancer for selected availability sets %v", serviceName, *availabilitySetNames) - } - // validate if the selected LB has not exceeded the MaximumLoadBalancerRuleCount - if az.Config.MaximumLoadBalancerRuleCount != 0 && selectedLBRuleCount >= az.Config.MaximumLoadBalancerRuleCount { - err = fmt.Errorf("selectLoadBalancer service (%s) - all available load balancers have exceeded maximum rule limit %d", serviceName, selectedLBRuleCount) - glog.Error(err) - return selectedLB, existsLb, err - } - - return selectedLB, existsLb, nil -} - // getLoadBalancerAvailabilitySetNames selects all possible availability sets for // service load balancer, if the service has no loadbalancer mode annotaion returns the // primary availability set if service annotation for loadbalancer availability set // exists then return the eligible a availability set func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes []*v1.Node) (availabilitySetNames *[]string, err error) { - hasMode, isAuto, serviceASL := getServiceLoadBalancerMode(service) + hasMode, isAuto, serviceAvailabilitySetNames := getServiceLoadBalancerMode(service) if !hasMode { // no mode specified in service annotation default to PrimaryAvailabilitySetName availabilitySetNames = &[]string{az.Config.PrimaryAvailabilitySetName} @@ -218,25 +156,25 @@ func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes // sort the list to have deterministic selection sort.Strings(*availabilitySetNames) if !isAuto { - if serviceASL == nil || len(serviceASL) == 0 { + if serviceAvailabilitySetNames == nil || len(serviceAvailabilitySetNames) == 0 { return nil, fmt.Errorf("service annotation for LoadBalancerMode is empty, it should have __auto__ or availability sets value") } // validate availability set exists var found bool - for sasx := range serviceASL { + for sasx := range serviceAvailabilitySetNames { for asx := range *availabilitySetNames { - if strings.EqualFold((*availabilitySetNames)[asx], serviceASL[sasx]) { + if strings.EqualFold((*availabilitySetNames)[asx], serviceAvailabilitySetNames[sasx]) { found = true - serviceASL[sasx] = (*availabilitySetNames)[asx] + serviceAvailabilitySetNames[sasx] = (*availabilitySetNames)[asx] break } } if !found { - glog.Errorf("az.getLoadBalancerAvailabilitySetNames - Availability set (%s) in service annotation not found", serviceASL[sasx]) - return nil, fmt.Errorf("availability set (%s) - not found", serviceASL[sasx]) + glog.Errorf("az.getLoadBalancerAvailabilitySetNames - Availability set (%s) in service annotation not found", serviceAvailabilitySetNames[sasx]) + return nil, fmt.Errorf("availability set (%s) - not found", serviceAvailabilitySetNames[sasx]) } } - availabilitySetNames = &serviceASL + availabilitySetNames = &serviceAvailabilitySetNames } return availabilitySetNames, nil @@ -244,7 +182,7 @@ func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes // lists the virtual machines for for the resource group and then builds // a list of availability sets that match the nodes available to k8s -func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]string, err error) { +func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) { vms, err := az.VirtualMachineClientListWithRetry() if err != nil { glog.Errorf("az.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err) @@ -258,7 +196,7 @@ func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]s } } availabilitySetIDs := sets.NewString() - agentPoolAs = &[]string{} + agentPoolAvailabilitySets = &[]string{} for nx := range nodes { nodeName := (*nodes[nx]).Name if isMasterNode(nodes[nx]) { @@ -282,10 +220,10 @@ func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAs *[]s // We want to keep it lower case, before the ID get fixed asName = strings.ToLower(asName) - *agentPoolAs = append(*agentPoolAs, asName) + *agentPoolAvailabilitySets = append(*agentPoolAvailabilitySets, asName) } - return agentPoolAs, nil + return agentPoolAvailabilitySets, nil } func (az *Cloud) mapLoadBalancerNameToAvailabilitySet(lbName string, clusterName string) (availabilitySetName string) { From 88aab6f67b6ed288db3d0785e7c06587bca9d438 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 10:55:36 -0800 Subject: [PATCH 07/18] fix azure bazel BUILD --- pkg/cloudprovider/providers/azure/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/cloudprovider/providers/azure/BUILD b/pkg/cloudprovider/providers/azure/BUILD index c1d7bb6d735..acd41bdd717 100644 --- a/pkg/cloudprovider/providers/azure/BUILD +++ b/pkg/cloudprovider/providers/azure/BUILD @@ -49,6 +49,7 @@ go_library( "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", ], @@ -61,10 +62,14 @@ go_test( library = ":go_default_library", deps = [ "//pkg/api/v1/service:go_default_library", + "//pkg/kubelet/apis:go_default_library", + "//vendor/github.com/Azure/azure-sdk-for-go/arm/compute:go_default_library", "//vendor/github.com/Azure/azure-sdk-for-go/arm/network:go_default_library", "//vendor/github.com/Azure/go-autorest/autorest/to:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", + "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", ], ) From faec1d7f463bc5ed573454a5d5cf6617fbc0f1d1 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 11:18:18 -0800 Subject: [PATCH 08/18] for error case, return nil for SG --- pkg/cloudprovider/providers/azure/azure_loadbalancer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 72ad6cfca63..80b40050ebc 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -798,7 +798,7 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, // Get lbIP since we make up NSG rules based on ingress IP lbIP := &lbStatus.Ingress[0].IP if lbIP == nil { - return &sg, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) + return nil, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) } destinationIPAddress = *lbIP } From 6b36a70d7995b337d82b4ca938a1d5679f6e9ba2 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 11:25:51 -0800 Subject: [PATCH 09/18] fix fake name convention --- .../providers/azure/azure_fakes.go | 24 +++++++++---------- .../providers/azure/azure_test.go | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index 0351f4efaa6..45e3eee904c 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -264,20 +264,20 @@ func (fAPC fakeAzurePIPClient) List(resourceGroupName string) (result network.Pu return result, nil } -type fakeInterfacesClient struct { +type fakeAzureInterfacesClient struct { mutex *sync.Mutex FakeStore map[string]map[string]network.Interface } -func NewFakeInterfacesClient() fakeInterfacesClient { - fIC := fakeInterfacesClient{} +func NewFakeAzureInterfacesClient() fakeAzureInterfacesClient { + fIC := fakeAzureInterfacesClient{} fIC.FakeStore = make(map[string]map[string]network.Interface) fIC.mutex = &sync.Mutex{} return fIC } -func (fIC fakeInterfacesClient) CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) { +func (fIC fakeAzureInterfacesClient) CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) { fIC.mutex.Lock() defer fIC.mutex.Unlock() resultChan := make(chan network.Interface, 1) @@ -300,7 +300,7 @@ func (fIC fakeInterfacesClient) CreateOrUpdate(resourceGroupName string, network return resultChan, errChan } -func (fIC fakeInterfacesClient) Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) { +func (fIC fakeAzureInterfacesClient) Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) { fIC.mutex.Lock() defer fIC.mutex.Unlock() if _, ok := fIC.FakeStore[resourceGroupName]; ok { @@ -314,19 +314,19 @@ func (fIC fakeInterfacesClient) Get(resourceGroupName string, networkInterfaceNa } } -type fakeVirtualMachinesClient struct { +type fakeAzureVirtualMachinesClient struct { mutex *sync.Mutex FakeStore map[string]map[string]compute.VirtualMachine } -func NewFakeVirtualMachinesClient() fakeVirtualMachinesClient { - fVMC := fakeVirtualMachinesClient{} +func NewFakeAzureVirtualMachinesClient() fakeAzureVirtualMachinesClient { + fVMC := fakeAzureVirtualMachinesClient{} fVMC.FakeStore = make(map[string]map[string]compute.VirtualMachine) fVMC.mutex = &sync.Mutex{} return fVMC } -func (fVMC fakeVirtualMachinesClient) CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) { +func (fVMC fakeAzureVirtualMachinesClient) CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) { fVMC.mutex.Lock() defer fVMC.mutex.Unlock() resultChan := make(chan compute.VirtualMachine, 1) @@ -348,7 +348,7 @@ func (fVMC fakeVirtualMachinesClient) CreateOrUpdate(resourceGroupName string, V return resultChan, errChan } -func (fVMC fakeVirtualMachinesClient) Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) { +func (fVMC fakeAzureVirtualMachinesClient) Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) { fVMC.mutex.Lock() defer fVMC.mutex.Unlock() if _, ok := fVMC.FakeStore[resourceGroupName]; ok { @@ -362,7 +362,7 @@ func (fVMC fakeVirtualMachinesClient) Get(resourceGroupName string, VMName strin } } -func (fVMC fakeVirtualMachinesClient) List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) { +func (fVMC fakeAzureVirtualMachinesClient) List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) { fVMC.mutex.Lock() defer fVMC.mutex.Unlock() var value []compute.VirtualMachine @@ -378,7 +378,7 @@ func (fVMC fakeVirtualMachinesClient) List(resourceGroupName string) (result com result.Value = &value return result, nil } -func (fVMC fakeVirtualMachinesClient) ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) { +func (fVMC fakeAzureVirtualMachinesClient) ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) { fVMC.mutex.Lock() defer fVMC.mutex.Unlock() return compute.VirtualMachineListResult{}, nil diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 73af15642c2..51d9fa12a17 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -822,8 +822,8 @@ func getTestCloud() (az *Cloud) { az.PublicIPAddressesClient = NewFakeAzurePIPClient(az.Config.SubscriptionID) az.SubnetsClient = NewFakeAzureSubnetsClient() az.SecurityGroupsClient = NewFakeAzureNSGClient() - az.VirtualMachinesClient = NewFakeVirtualMachinesClient() - az.InterfacesClient = NewFakeInterfacesClient() + az.VirtualMachinesClient = NewFakeAzureVirtualMachinesClient() + az.InterfacesClient = NewFakeAzureInterfacesClient() return az } From 83f18ca3f0bace5db472555e6c493c5f5500e868 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 11:33:48 -0800 Subject: [PATCH 10/18] refactor fake Delete function --- .../providers/azure/azure_fakes.go | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index 45e3eee904c..755452ac723 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -71,9 +71,9 @@ func (fLBC fakeAzureLBClient) Delete(resourceGroupName string, loadBalancerName close(respChan) close(errChan) }() - if _, ok := fLBC.FakeStore[resourceGroupName]; ok { - if _, ok := fLBC.FakeStore[resourceGroupName][loadBalancerName]; ok { - delete(fLBC.FakeStore[resourceGroupName], loadBalancerName) + if rgLBs, ok := fLBC.FakeStore[resourceGroupName]; ok { + if _, ok := rgLBs[loadBalancerName]; ok { + delete(rgLBs, loadBalancerName) resp.Response = &http.Response{ StatusCode: http.StatusAccepted, } @@ -207,9 +207,9 @@ func (fAPC fakeAzurePIPClient) Delete(resourceGroupName string, publicIPAddressN close(respChan) close(errChan) }() - if _, ok := fAPC.FakeStore[resourceGroupName]; ok { - if _, ok := fAPC.FakeStore[resourceGroupName][publicIPAddressName]; ok { - delete(fAPC.FakeStore[resourceGroupName], publicIPAddressName) + if rgPIPs, ok := fAPC.FakeStore[resourceGroupName]; ok { + if _, ok := rgPIPs[publicIPAddressName]; ok { + delete(rgPIPs, publicIPAddressName) resp.Response = &http.Response{ StatusCode: http.StatusAccepted, } @@ -434,9 +434,9 @@ func (fASC fakeAzureSubnetsClient) Delete(resourceGroupName string, virtualNetwo }() rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND") - if _, ok := fASC.FakeStore[rgVnet]; ok { - if _, ok := fASC.FakeStore[rgVnet][subnetName]; ok { - delete(fASC.FakeStore[rgVnet], subnetName) + if rgSubnets, ok := fASC.FakeStore[rgVnet]; ok { + if _, ok := rgSubnets[subnetName]; ok { + delete(rgSubnets, subnetName) resp.Response = &http.Response{ StatusCode: http.StatusAccepted, } @@ -532,9 +532,9 @@ func (fNSG fakeAzureNSGClient) Delete(resourceGroupName string, networkSecurityG close(respChan) close(errChan) }() - if _, ok := fNSG.FakeStore[resourceGroupName]; ok { - if _, ok := fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]; ok { - delete(fNSG.FakeStore[resourceGroupName], networkSecurityGroupName) + if rgSGs, ok := fNSG.FakeStore[resourceGroupName]; ok { + if _, ok := rgSGs[networkSecurityGroupName]; ok { + delete(rgSGs, networkSecurityGroupName) resp.Response = &http.Response{ StatusCode: http.StatusAccepted, } From 1bf1c0d4d5811cd459b3d9a2a7e977ded8ee82c3 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 12:29:55 -0800 Subject: [PATCH 11/18] add azure_fakes.go Boilerplate header --- pkg/cloudprovider/providers/azure/azure_fakes.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index 755452ac723..b5dbb1798bb 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -1,3 +1,19 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package azure import ( From c3050e3ab4bf1a7b24ba7778b4436a893219d85c Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 13:23:45 -0800 Subject: [PATCH 12/18] make newFake* functions unexported --- pkg/cloudprovider/providers/azure/azure_fakes.go | 12 ++++++------ pkg/cloudprovider/providers/azure/azure_test.go | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index b5dbb1798bb..c96c8cd869a 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -36,7 +36,7 @@ type fakeAzureLBClient struct { FakeStore map[string]map[string]network.LoadBalancer } -func NewFakeAzureLBClient() fakeAzureLBClient { +func newFakeAzureLBClient() fakeAzureLBClient { fLBC := fakeAzureLBClient{} fLBC.FakeStore = make(map[string]map[string]network.LoadBalancer) fLBC.mutex = &sync.Mutex{} @@ -166,7 +166,7 @@ func getpublicIPAddressID(subscriptionID string, resourceGroupName, pipName stri pipName) } -func NewFakeAzurePIPClient(subscriptionID string) fakeAzurePIPClient { +func newFakeAzurePIPClient(subscriptionID string) fakeAzurePIPClient { fAPC := fakeAzurePIPClient{} fAPC.FakeStore = make(map[string]map[string]network.PublicIPAddress) fAPC.SubscriptionID = subscriptionID @@ -285,7 +285,7 @@ type fakeAzureInterfacesClient struct { FakeStore map[string]map[string]network.Interface } -func NewFakeAzureInterfacesClient() fakeAzureInterfacesClient { +func newFakeAzureInterfacesClient() fakeAzureInterfacesClient { fIC := fakeAzureInterfacesClient{} fIC.FakeStore = make(map[string]map[string]network.Interface) fIC.mutex = &sync.Mutex{} @@ -335,7 +335,7 @@ type fakeAzureVirtualMachinesClient struct { FakeStore map[string]map[string]compute.VirtualMachine } -func NewFakeAzureVirtualMachinesClient() fakeAzureVirtualMachinesClient { +func newFakeAzureVirtualMachinesClient() fakeAzureVirtualMachinesClient { fVMC := fakeAzureVirtualMachinesClient{} fVMC.FakeStore = make(map[string]map[string]compute.VirtualMachine) fVMC.mutex = &sync.Mutex{} @@ -405,7 +405,7 @@ type fakeAzureSubnetsClient struct { FakeStore map[string]map[string]network.Subnet } -func NewFakeAzureSubnetsClient() fakeAzureSubnetsClient { +func newFakeAzureSubnetsClient() fakeAzureSubnetsClient { fASC := fakeAzureSubnetsClient{} fASC.FakeStore = make(map[string]map[string]network.Subnet) fASC.mutex = &sync.Mutex{} @@ -506,7 +506,7 @@ type fakeAzureNSGClient struct { FakeStore map[string]map[string]network.SecurityGroup } -func NewFakeAzureNSGClient() fakeAzureNSGClient { +func newFakeAzureNSGClient() fakeAzureNSGClient { fNSG := fakeAzureNSGClient{} fNSG.FakeStore = make(map[string]map[string]network.SecurityGroup) fNSG.mutex = &sync.Mutex{} diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 51d9fa12a17..07279227f3d 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -818,12 +818,12 @@ func getTestCloud() (az *Cloud) { }, } az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(100, 100) - az.LoadBalancerClient = NewFakeAzureLBClient() - az.PublicIPAddressesClient = NewFakeAzurePIPClient(az.Config.SubscriptionID) - az.SubnetsClient = NewFakeAzureSubnetsClient() - az.SecurityGroupsClient = NewFakeAzureNSGClient() - az.VirtualMachinesClient = NewFakeAzureVirtualMachinesClient() - az.InterfacesClient = NewFakeAzureInterfacesClient() + az.LoadBalancerClient = newFakeAzureLBClient() + az.PublicIPAddressesClient = newFakeAzurePIPClient(az.Config.SubscriptionID) + az.SubnetsClient = newFakeAzureSubnetsClient() + az.SecurityGroupsClient = newFakeAzureNSGClient() + az.VirtualMachinesClient = newFakeAzureVirtualMachinesClient() + az.InterfacesClient = newFakeAzureInterfacesClient() return az } From ff961163aaa1994e2cf61d3d8c820ade2cf7c0b6 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 15:04:08 -0800 Subject: [PATCH 13/18] clean up retry logic, since we try at least once --- .../providers/azure/azure_fakes.go | 18 +++ .../providers/azure/azure_instances.go | 15 +-- .../providers/azure/azure_loadbalancer.go | 106 +++++------------- 3 files changed, 48 insertions(+), 91 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index c96c8cd869a..862627c450a 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -70,6 +70,9 @@ func (fLBC fakeAzureLBClient) CreateOrUpdate(resourceGroupName string, loadBalan } fLBC.FakeStore[resourceGroupName][loadBalancerName] = parameters result = fLBC.FakeStore[resourceGroupName][loadBalancerName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan } @@ -206,6 +209,9 @@ func (fAPC fakeAzurePIPClient) CreateOrUpdate(resourceGroupName string, publicIP fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters result = fAPC.FakeStore[resourceGroupName][publicIPAddressName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan } @@ -311,6 +317,9 @@ func (fIC fakeAzureInterfacesClient) CreateOrUpdate(resourceGroupName string, ne } fIC.FakeStore[resourceGroupName][networkInterfaceName] = parameters result = fIC.FakeStore[resourceGroupName][networkInterfaceName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan @@ -360,6 +369,9 @@ func (fVMC fakeAzureVirtualMachinesClient) CreateOrUpdate(resourceGroupName stri } fVMC.FakeStore[resourceGroupName][VMName] = parameters result = fVMC.FakeStore[resourceGroupName][VMName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan } @@ -431,6 +443,9 @@ func (fASC fakeAzureSubnetsClient) CreateOrUpdate(resourceGroupName string, virt } fASC.FakeStore[rgVnet][subnetName] = subnetParameters result = fASC.FakeStore[rgVnet][subnetName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan } @@ -531,6 +546,9 @@ func (fNSG fakeAzureNSGClient) CreateOrUpdate(resourceGroupName string, networkS } fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] = parameters result = fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] + result.Response.Response = &http.Response{ + StatusCode: http.StatusOK, + } err = nil return resultChan, errChan } diff --git a/pkg/cloudprovider/providers/azure/azure_instances.go b/pkg/cloudprovider/providers/azure/azure_instances.go index fe9ed07ae06..bde33ab323a 100644 --- a/pkg/cloudprovider/providers/azure/azure_instances.go +++ b/pkg/cloudprovider/providers/azure/azure_instances.go @@ -48,19 +48,10 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) { } return addresses, nil } - ip, err := az.getIPForMachine(name) + ip, err := az.GetIPForMachineWithRetry(name) if err != nil { - if az.CloudProviderBackoff { - glog.V(2).Infof("NodeAddresses(%s) backing off", name) - ip, err = az.GetIPForMachineWithRetry(name) - if err != nil { - glog.V(2).Infof("NodeAddresses(%s) abort backoff", name) - return nil, err - } - } else { - glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err) - return nil, err - } + glog.V(2).Infof("NodeAddresses(%s) abort backoff", name) + return nil, err } return []v1.NodeAddress{ diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 80b40050ebc..41d44e9a7b5 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -363,22 +363,13 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel stri pip.Tags = &map[string]*string{"service": &serviceName} glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name) az.operationPollRateLimiter.Accept() - glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%q): start", *pip.Name) - respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil) - resp := <-respChan - err = <-errChan - glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%q): end", *pip.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("ensure(%s) backing off: pip(%s) - creating", serviceName, *pip.Name) - retryErr := az.CreateOrUpdatePIPWithRetry(pip) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name) - err = retryErr - } - } + glog.V(10).Infof("CreateOrUpdatePIPWithRetry(%q): start", *pip.Name) + err = az.CreateOrUpdatePIPWithRetry(pip) if err != nil { + glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name) return nil, err } + glog.V(10).Infof("CreateOrUpdatePIPWithRetry(%q): end", *pip.Name) az.operationPollRateLimiter.Accept() glog.V(10).Infof("PublicIPAddressesClient.Get(%q): start", *pip.Name) @@ -709,39 +700,17 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, az.operationPollRateLimiter.Accept() glog.V(10).Infof("LoadBalancerClient.Delete(%q): start", lbName) - respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) { - glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) - retryErr := az.DeleteLBWithRetry(lbName) - if retryErr != nil { - err = retryErr - glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) - } - } + err := az.DeleteLBWithRetry(lbName) if err != nil { + glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName) return nil, err } - + glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName) } else { glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName) - az.operationPollRateLimiter.Accept() - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): start", lbName) - respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, lbName, *lb, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): end", lbName) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName) - retryErr := az.CreateOrUpdateLBWithRetry(*lb) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName) - return nil, retryErr - } - } + err := az.CreateOrUpdateLBWithRetry(*lb) if err != nil { + glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName) return nil, err } } @@ -892,22 +861,13 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, sg.SecurityRules = &updatedRules glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name) az.operationPollRateLimiter.Accept() - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name) - respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) - resp := <-respChan - err := <-errChan - glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name) - if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) { - glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name) - retryErr := az.CreateOrUpdateSGWithRetry(sg) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) - return nil, retryErr - } - } + glog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): start", *sg.Name) + err := az.CreateOrUpdateSGWithRetry(sg) if err != nil { + glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) return nil, err } + glog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name) } return &sg, nil } @@ -938,22 +898,18 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, want } else { glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName) az.operationPollRateLimiter.Accept() - glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): start", pipName) - resp, deleteErrChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) - deleteErr := <-deleteErrChan - glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): end", pipName) // response not read yet... - if az.CloudProviderBackoff && shouldRetryAPIRequest(<-resp, deleteErr) { - glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName) - retryErr := az.DeletePublicIPWithRetry(pipName) - if retryErr != nil { - glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName) - return nil, retryErr - } + glog.V(10).Infof("DeletePublicIPWithRetry(%q): start", pipName) + err = az.DeletePublicIPWithRetry(pipName) + if err != nil { + glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName) + // We let err to pass through + // It may be ignorable } + glog.V(10).Infof("DeletePublicIPWithRetry(%q): end", pipName) // response not read yet... - deleteErr = ignoreStatusNotFoundFromError(deleteErr) - if deleteErr != nil { - return nil, deleteErr + err = ignoreStatusNotFoundFromError(err) + if err != nil { + return nil, err } glog.V(2).Infof("ensure(%s): pip(%s) - finished", serviceName, pipName) } @@ -1007,20 +963,12 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b vmName := mapNodeNameToVMName(nodeName) az.operationPollRateLimiter.Accept() glog.V(10).Infof("VirtualMachinesClient.Get(%q): start", vmName) - machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "") - glog.V(10).Infof("VirtualMachinesClient.Get(%q): end", vmName) + machine, err := az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "") if err != nil { - if az.CloudProviderBackoff { - glog.V(2).Infof("ensureHostInPool(%s, %s, %s) backing off", serviceName, nodeName, backendPoolID) - machine, err = az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "") - if err != nil { - glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID) - return err - } - } else { - return err - } + glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID) + return err } + glog.V(10).Infof("VirtualMachinesClient.Get(%q): end", vmName) primaryNicID, err := getPrimaryInterfaceID(machine) if err != nil { From 1b9b3fd7c774576e460abe84fef980a60599a83e Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 16:09:08 -0800 Subject: [PATCH 14/18] assign random ip instead of hard code --- pkg/cloudprovider/providers/azure/azure_fakes.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_fakes.go b/pkg/cloudprovider/providers/azure/azure_fakes.go index 862627c450a..98f35b1c572 100644 --- a/pkg/cloudprovider/providers/azure/azure_fakes.go +++ b/pkg/cloudprovider/providers/azure/azure_fakes.go @@ -64,7 +64,9 @@ func (fLBC fakeAzureLBClient) CreateOrUpdate(resourceGroupName string, loadBalan if parameters.FrontendIPConfigurations != nil { for idx, config := range *parameters.FrontendIPConfigurations { if config.PrivateIPAllocationMethod == network.Dynamic { - (*parameters.FrontendIPConfigurations)[idx].PrivateIPAddress = to.StringPtr("10.0.0.19") + // Here we randomly assign an ip as private ip + // It dosen't smart enough to know whether it is in the subnet's range + (*parameters.FrontendIPConfigurations)[idx].PrivateIPAddress = getRandomIPPtr() } } } @@ -202,9 +204,7 @@ func (fAPC fakeAzurePIPClient) CreateOrUpdate(resourceGroupName string, publicIP if parameters.PublicIPAddressPropertiesFormat != nil && parameters.PublicIPAddressPropertiesFormat.PublicIPAllocationMethod == network.Static { // assign ip - rand.Seed(time.Now().UnixNano()) - randomIP := fmt.Sprintf("%d.%d.%d.%d", rand.Intn(256), rand.Intn(256), rand.Intn(256), rand.Intn(256)) - parameters.IPAddress = &randomIP + parameters.IPAddress = getRandomIPPtr() } fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters @@ -616,3 +616,8 @@ func (fNSG fakeAzureNSGClient) List(resourceGroupName string) (result network.Se result.Value = &value return result, nil } + +func getRandomIPPtr() *string { + rand.Seed(time.Now().UnixNano()) + return to.StringPtr(fmt.Sprintf("%d.%d.%d.%d", rand.Intn(256), rand.Intn(256), rand.Intn(256), rand.Intn(256))) +} From 839e7f4c38ad3887add2eaca7fe54d504a36b395 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Thu, 16 Nov 2017 16:32:02 -0800 Subject: [PATCH 15/18] add test for flipServiceInternalAnnotation --- .../providers/azure/azure_test.go | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 07279227f3d..811cdec45cc 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -40,6 +40,30 @@ import ( var testClusterName = "testCluster" +// Test flipServiceInternalAnnotation +func TestFlipServiceInternalAnnotation(t *testing.T) { + svc := getTestService("servicea", v1.ProtocolTCP, 80) + svcUpdated := flipServiceInternalAnnotation(&svc) + if !requiresInternalLoadBalancer(svcUpdated) { + t.Errorf("Expected svc to be an internal service") + } + svcUpdated = flipServiceInternalAnnotation(svcUpdated) + if requiresInternalLoadBalancer(svcUpdated) { + t.Errorf("Expected svc to be an external service") + } + + svc2 := getInternalTestService("serviceb", 8081) + svc2Updated := flipServiceInternalAnnotation(&svc2) + if requiresInternalLoadBalancer(svc2Updated) { + t.Errorf("Expected svc to be an external service") + } + + svc2Updated = flipServiceInternalAnnotation(svc2Updated) + if !requiresInternalLoadBalancer(svc2Updated) { + t.Errorf("Expected svc to be an internal service") + } +} + // Test additional of a new service/port. func TestAddPort(t *testing.T) { az := getTestCloud() From 422dac5d9be0efc59f8918293a282eb8c72163ca Mon Sep 17 00:00:00 2001 From: itowlson Date: Fri, 17 Nov 2017 14:05:51 +1300 Subject: [PATCH 16/18] Option to consolidate Azure NSG rules for services (#13) * Option to consolidate Azure NSG rules for services * Fixed panic checking for service on other Azure LB --- .../providers/azure/azure_loadbalancer.go | 336 ++++++- .../providers/azure/azure_test.go | 903 +++++++++++++++++- .../providers/azure/azure_util.go | 4 + 3 files changed, 1198 insertions(+), 45 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 41d44e9a7b5..a4eee6e7d6e 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -59,6 +59,14 @@ const ServiceAnnotationLoadBalancerAutoModeValue = "__auto__" // ServiceAnnotationDNSLabelName annotation speficying the DNS label name for the service. const ServiceAnnotationDNSLabelName = "service.beta.kubernetes.io/azure-dns-label-name" +// ServiceAnnotationSharedSecurityRule is the annotation used on the service +// to specify that the service should be exposed using an Azure security rule +// that may be shared with other service, trading specificity of rules for an +// increase in the number of services that can be exposed. This relies on the +// Azure "augmented security rules" feature which at the time of writing is in +// preview and available only in certain regions. +const ServiceAnnotationSharedSecurityRule = "service.beta.kubernetes.io/azure-shared-securityrule" + // GetLoadBalancer returns whether the specified load balancer exists, and // if so, what its status is. func (az *Cloud) GetLoadBalancer(clusterName string, service *v1.Service) (status *v1.LoadBalancerStatus, exists bool, err error) { @@ -107,7 +115,12 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod return nil, err } - if _, err := az.reconcileSecurityGroup(clusterName, service, lbStatus, true /* wantLb */); err != nil { + var serviceIP *string + if lbStatus != nil && len(lbStatus.Ingress) > 0 { + serviceIP = &lbStatus.Ingress[0].IP + } + glog.V(10).Infof("Calling reconcileSecurityGroup from EnsureLoadBalancer for %s with IP %s, wantLb = true", service.Name, logSafe(serviceIP)) + if _, err := az.reconcileSecurityGroup(clusterName, service, serviceIP, true /* wantLb */); err != nil { return nil, err } @@ -127,9 +140,17 @@ func (az *Cloud) UpdateLoadBalancer(clusterName string, service *v1.Service, nod // have multiple underlying components, meaning a Get could say that the LB // doesn't exist even if some part of it is still laying around. func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Service) error { + isInternal := requiresInternalLoadBalancer(service) serviceName := getServiceName(service) glog.V(5).Infof("delete(%s): START clusterName=%q", serviceName, clusterName) - if _, err := az.reconcileSecurityGroup(clusterName, service, nil, false /* wantLb */); err != nil { + + serviceIPToCleanup, err := az.findServiceIPAddress(clusterName, service, isInternal) + if err != nil { + return err + } + + glog.V(10).Infof("Calling reconcileSecurityGroup from EnsureLoadBalancerDeleted for %s with IP %s, wantLb = false", service.Name, serviceIPToCleanup) + if _, err := az.reconcileSecurityGroup(clusterName, service, &serviceIPToCleanup, false /* wantLb */); err != nil { return err } @@ -331,6 +352,9 @@ func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service) func flipServiceInternalAnnotation(service *v1.Service) *v1.Service { copyService := service.DeepCopy() + if copyService.Annotations == nil { + copyService.Annotations = map[string]string{} + } if v, ok := copyService.Annotations[ServiceAnnotationLoadBalancerInternal]; ok && v == "true" { // If it is internal now, we make it external by remove the annotation delete(copyService.Annotations, ServiceAnnotationLoadBalancerInternal) @@ -341,6 +365,25 @@ func flipServiceInternalAnnotation(service *v1.Service) *v1.Service { return copyService } +func (az *Cloud) findServiceIPAddress(clusterName string, service *v1.Service, isInternalLb bool) (string, error) { + if len(service.Spec.LoadBalancerIP) > 0 { + return service.Spec.LoadBalancerIP, nil + } + + lbStatus, existsLb, err := az.GetLoadBalancer(clusterName, service) + if err != nil { + return "", err + } + if !existsLb { + return "", fmt.Errorf("Expected to find an IP address for service %s but did not", service.Name) + } + if len(lbStatus.Ingress) < 1 { + return "", fmt.Errorf("Expected to find an IP address for service %s but it had no ingresses", service.Name) + } + + return lbStatus.Ingress[0].IP, nil +} + func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel string) (*network.PublicIPAddress, error) { pip, existsPip, err := az.getPublicIPAddress(pipName) if err != nil { @@ -744,16 +787,19 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, // This reconciles the Network Security Group similar to how the LB is reconciled. // This entails adding required, missing SecurityRules and removing stale rules. -func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, lbStatus *v1.LoadBalancerStatus, wantLb bool) (*network.SecurityGroup, error) { +func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, lbIP *string, wantLb bool) (*network.SecurityGroup, error) { serviceName := getServiceName(service) - glog.V(5).Infof("ensure(%s): START clusterName=%q lbName=%q", serviceName, clusterName) + glog.V(5).Infof("reconcileSecurityGroup(%s): START clusterName=%q lbName=%q", serviceName, clusterName) - var ports []v1.ServicePort - if wantLb { - ports = service.Spec.Ports - } else { + ports := service.Spec.Ports + if ports == nil { + if useSharedSecurityRule(service) { + glog.V(2).Infof("Attempting to reconcile security group for service %s, but service uses shared rule and we don't know which port it's for", service.Name) + return nil, fmt.Errorf("No port info for reconciling shared rule for service %s", service.Name) + } ports = []v1.ServicePort{} } + az.operationPollRateLimiter.Accept() glog.V(10).Infof("SecurityGroupsClient.Get(%q): start", az.SecurityGroupName) sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") @@ -763,12 +809,10 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, } destinationIPAddress := "" - if wantLb { - // Get lbIP since we make up NSG rules based on ingress IP - lbIP := &lbStatus.Ingress[0].IP - if lbIP == nil { - return nil, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) - } + if wantLb && lbIP == nil { + return nil, fmt.Errorf("No load balancer IP for setting up security rules for service %s", service.Name) + } + if lbIP != nil { destinationIPAddress = *lbIP } if destinationIPAddress == "" { @@ -789,38 +833,52 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, sourceAddressPrefixes = append(sourceAddressPrefixes, ip.String()) } } - expectedSecurityRules := make([]network.SecurityRule, len(ports)*len(sourceAddressPrefixes)) + expectedSecurityRules := []network.SecurityRule{} - for i, port := range ports { - _, securityProto, _, err := getProtocolsFromKubernetesProtocol(port.Protocol) - if err != nil { - return nil, err - } - for j := range sourceAddressPrefixes { - ix := i*len(sourceAddressPrefixes) + j - securityRuleName := getSecurityRuleName(service, port, sourceAddressPrefixes[j]) - expectedSecurityRules[ix] = network.SecurityRule{ - Name: to.StringPtr(securityRuleName), - SecurityRulePropertiesFormat: &network.SecurityRulePropertiesFormat{ - Protocol: *securityProto, - SourcePortRange: to.StringPtr("*"), - DestinationPortRange: to.StringPtr(strconv.Itoa(int(port.Port))), - SourceAddressPrefix: to.StringPtr(sourceAddressPrefixes[j]), - DestinationAddressPrefix: to.StringPtr(destinationIPAddress), - Access: network.SecurityRuleAccessAllow, - Direction: network.SecurityRuleDirectionInbound, - }, + if wantLb { + expectedSecurityRules = make([]network.SecurityRule, len(ports)*len(sourceAddressPrefixes)) + + for i, port := range ports { + _, securityProto, _, err := getProtocolsFromKubernetesProtocol(port.Protocol) + if err != nil { + return nil, err + } + for j := range sourceAddressPrefixes { + ix := i*len(sourceAddressPrefixes) + j + securityRuleName := getSecurityRuleName(service, port, sourceAddressPrefixes[j]) + expectedSecurityRules[ix] = network.SecurityRule{ + Name: to.StringPtr(securityRuleName), + SecurityRulePropertiesFormat: &network.SecurityRulePropertiesFormat{ + Protocol: *securityProto, + SourcePortRange: to.StringPtr("*"), + DestinationPortRange: to.StringPtr(strconv.Itoa(int(port.Port))), + SourceAddressPrefix: to.StringPtr(sourceAddressPrefixes[j]), + DestinationAddressPrefix: to.StringPtr(destinationIPAddress), + Access: network.SecurityRuleAccessAllow, + Direction: network.SecurityRuleDirectionInbound, + }, + } } } } + for _, r := range expectedSecurityRules { + glog.V(10).Infof("Expecting security rule for %s: %s:%s -> %s:%s", service.Name, *r.SourceAddressPrefix, *r.SourcePortRange, *r.DestinationAddressPrefix, *r.DestinationPortRange) + } + // update security rules dirtySg := false var updatedRules []network.SecurityRule if sg.SecurityRules != nil { updatedRules = *sg.SecurityRules } - // update security rules: remove unwanted + + for _, r := range updatedRules { + glog.V(10).Infof("Existing security rule while processing %s: %s:%s -> %s:%s", service.Name, logSafe(r.SourceAddressPrefix), logSafe(r.SourcePortRange), logSafeCollection(r.DestinationAddressPrefix, r.DestinationAddressPrefixes), logSafe(r.DestinationPortRange)) + } + + // update security rules: remove unwanted rules that belong privately + // to this service for i := len(updatedRules) - 1; i >= 0; i-- { existingRule := updatedRules[i] if serviceOwnsRule(service, *existingRule.Name) { @@ -837,6 +895,50 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, } } } + // update security rules: if the service uses a shared rule and is being deleted, + // then remove it from the shared rule + if useSharedSecurityRule(service) && !wantLb { + for _, port := range ports { + for _, sourceAddressPrefix := range sourceAddressPrefixes { + sharedRuleName := getSecurityRuleName(service, port, sourceAddressPrefix) + sharedIndex, sharedRule, sharedRuleFound := findSecurityRuleByName(updatedRules, sharedRuleName) + if !sharedRuleFound { + glog.V(4).Infof("Expected to find shared rule %s for service %s being deleted, but did not", sharedRuleName, service.Name) + return nil, fmt.Errorf("Expected to find shared rule %s for service %s being deleted, but did not", sharedRuleName, service.Name) + } + if sharedRule.DestinationAddressPrefixes == nil { + glog.V(4).Infof("Expected to have array of destinations in shared rule for service %s being deleted, but did not", service.Name) + return nil, fmt.Errorf("Expected to have array of destinations in shared rule for service %s being deleted, but did not", service.Name) + } + existingPrefixes := *sharedRule.DestinationAddressPrefixes + addressIndex, found := findIndex(existingPrefixes, destinationIPAddress) + if !found { + glog.V(4).Infof("Expected to find destination address %s in shared rule %s for service %s being deleted, but did not", destinationIPAddress, sharedRuleName, service.Name) + return nil, fmt.Errorf("Expected to find destination address %s in shared rule %s for service %s being deleted, but did not", destinationIPAddress, sharedRuleName, service.Name) + } + if len(existingPrefixes) == 1 { + updatedRules = append(updatedRules[:sharedIndex], updatedRules[sharedIndex+1:]...) + } else { + newDestinations := append(existingPrefixes[:addressIndex], existingPrefixes[addressIndex+1:]...) + sharedRule.DestinationAddressPrefixes = &newDestinations + updatedRules[sharedIndex] = sharedRule + } + dirtySg = true + } + } + } + + // update security rules: prepare rules for consolidation + for index, rule := range updatedRules { + if allowsConsolidation(rule) { + updatedRules[index] = makeConsolidatable(rule) + } + } + for index, rule := range expectedSecurityRules { + if allowsConsolidation(rule) { + expectedSecurityRules[index] = makeConsolidatable(rule) + } + } // update security rules: add needed for _, expectedRule := range expectedSecurityRules { foundRule := false @@ -844,6 +946,11 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, glog.V(10).Infof("reconcile(%s)(%t): sg rule(%s) - already exists", serviceName, wantLb, *expectedRule.Name) foundRule = true } + if foundRule && allowsConsolidation(expectedRule) { + index, _ := findConsolidationCandidate(updatedRules, expectedRule) + updatedRules[index] = consolidate(updatedRules[index], expectedRule) + dirtySg = true + } if !foundRule { glog.V(10).Infof("reconcile(%s)(%t): sg rule(%s) - adding", serviceName, wantLb, *expectedRule.Name) @@ -857,6 +964,11 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, dirtySg = true } } + + for _, r := range updatedRules { + glog.V(10).Infof("Updated security rule while processing %s: %s:%s -> %s:%s", service.Name, logSafe(r.SourceAddressPrefix), logSafe(r.SourcePortRange), logSafeCollection(r.DestinationAddressPrefix, r.DestinationAddressPrefixes), logSafe(r.DestinationPortRange)) + } + if dirtySg { sg.SecurityRules = &updatedRules glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name) @@ -865,6 +977,14 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, err := az.CreateOrUpdateSGWithRetry(sg) if err != nil { glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) + // TODO (Nov 2017): remove when augmented security rules are out of preview + // we could try to parse the response but it's not worth it for bridging a preview + errorDescription := err.Error() + if strings.Contains(errorDescription, "SubscriptionNotRegisteredForFeature") && strings.Contains(errorDescription, "Microsoft.Network/AllowAccessRuleExtendedProperties") { + sharedRuleError := fmt.Errorf("Shared security rules are not available in this Azure region. Details: %v", errorDescription) + return nil, sharedRuleError + } + // END TODO return nil, err } glog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name) @@ -872,6 +992,144 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, return &sg, nil } +func logSafe(s *string) string { + if s == nil { + return "(nil)" + } + return *s +} + +func logSafeCollection(s *string, strs *[]string) string { + if s == nil { + if strs == nil { + return "(nil)" + } + return "[" + strings.Join(*strs, ",") + "]" + } + return *s +} + +func findSecurityRuleByName(rules []network.SecurityRule, ruleName string) (int, network.SecurityRule, bool) { + for index, rule := range rules { + if rule.Name != nil && strings.EqualFold(*rule.Name, ruleName) { + return index, rule, true + } + } + return 0, network.SecurityRule{}, false +} + +func findIndex(strs []string, s string) (int, bool) { + for index, str := range strs { + if strings.EqualFold(str, s) { + return index, true + } + } + return 0, false +} + +func allowsConsolidation(rule network.SecurityRule) bool { + return strings.HasPrefix(*rule.Name, "shared") +} + +func findConsolidationCandidate(rules []network.SecurityRule, rule network.SecurityRule) (int, bool) { + for index, r := range rules { + if allowsConsolidation(r) { + if strings.EqualFold(*r.Name, *rule.Name) { + return index, true + } + } + } + + return 0, false +} + +func makeConsolidatable(rule network.SecurityRule) network.SecurityRule { + return network.SecurityRule{ + Name: rule.Name, + SecurityRulePropertiesFormat: &network.SecurityRulePropertiesFormat{ + Priority: rule.Priority, + Protocol: rule.Protocol, + SourcePortRange: rule.SourcePortRange, + SourcePortRanges: rule.SourcePortRanges, + DestinationPortRange: rule.DestinationPortRange, + DestinationPortRanges: rule.DestinationPortRanges, + SourceAddressPrefix: rule.SourceAddressPrefix, + SourceAddressPrefixes: rule.SourceAddressPrefixes, + DestinationAddressPrefixes: collectionOrSingle(rule.DestinationAddressPrefixes, rule.DestinationAddressPrefix), + Access: rule.Access, + Direction: rule.Direction, + }, + } +} + +func consolidate(existingRule network.SecurityRule, newRule network.SecurityRule) network.SecurityRule { + destinations := appendElements(existingRule.SecurityRulePropertiesFormat.DestinationAddressPrefixes, newRule.DestinationAddressPrefix, newRule.DestinationAddressPrefixes) + destinations = deduplicate(destinations) // there are transient conditions during controller startup where it tries to add a service that is already added + + return network.SecurityRule{ + Name: existingRule.Name, + SecurityRulePropertiesFormat: &network.SecurityRulePropertiesFormat{ + Priority: existingRule.Priority, + Protocol: existingRule.Protocol, + SourcePortRange: existingRule.SourcePortRange, + SourcePortRanges: existingRule.SourcePortRanges, + DestinationPortRange: existingRule.DestinationPortRange, + DestinationPortRanges: existingRule.DestinationPortRanges, + SourceAddressPrefix: existingRule.SourceAddressPrefix, + SourceAddressPrefixes: existingRule.SourceAddressPrefixes, + DestinationAddressPrefixes: destinations, + Access: existingRule.Access, + Direction: existingRule.Direction, + }, + } +} + +func collectionOrSingle(collection *[]string, s *string) *[]string { + if collection != nil && len(*collection) > 0 { + return collection + } + if s == nil { + return &[]string{} + } + return &[]string{*s} +} + +func appendElements(collection *[]string, appendString *string, appendStrings *[]string) *[]string { + newCollection := []string{} + + if collection != nil { + newCollection = append(newCollection, *collection...) + } + if appendString != nil { + newCollection = append(newCollection, *appendString) + } + if appendStrings != nil { + newCollection = append(newCollection, *appendStrings...) + } + + return &newCollection +} + +func deduplicate(collection *[]string) *[]string { + if collection == nil { + return nil + } + + seen := map[string]bool{} + result := make([]string, 0, len(*collection)) + + for _, v := range *collection { + if seen[v] == true { + // skip this element + } else { + seen[v] = true + result = append(result, v) + } + } + + return &result +} + // This reconciles the PublicIP resources similar to how the LB is reconciled. func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error) { isInternal := requiresInternalLoadBalancer(service) @@ -1087,3 +1345,11 @@ func getServiceLoadBalancerMode(service *v1.Service) (hasMode bool, isAuto bool, return hasMode, isAuto, availabilitySetNames } + +func useSharedSecurityRule(service *v1.Service) bool { + if l, ok := service.Annotations[ServiceAnnotationSharedSecurityRule]; ok { + return l == "true" + } + + return false +} diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 811cdec45cc..db6fac8d3f7 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -660,7 +660,7 @@ func TestReconcileSecurityGroupNewServiceAddsPort(t *testing.T) { lb, _ := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true) lbStatus, _ := az.getServiceLoadBalancerStatus(&svc1, lb) - sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, lbStatus, true /* wantLb */) + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, &lbStatus.Ingress[0].IP, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -677,7 +677,7 @@ func TestReconcileSecurityGroupNewInternalServiceAddsPort(t *testing.T) { lb, _ := az.reconcileLoadBalancer(testClusterName, &svc1, clusterResources.nodes, true) lbStatus, _ := az.getServiceLoadBalancerStatus(&svc1, lb) - sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, lbStatus, true /* wantLb */) + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, &lbStatus.Ingress[0].IP, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -699,7 +699,7 @@ func TestReconcileSecurityGroupRemoveService(t *testing.T) { sg := getTestSecurityGroup(az, service1, service2) validateSecurityGroup(t, sg, service1, service2) - sg, err := az.reconcileSecurityGroup(testClusterName, &service1, lbStatus, false /* wantLb */) + sg, err := az.reconcileSecurityGroup(testClusterName, &service1, &lbStatus.Ingress[0].IP, false /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -717,7 +717,7 @@ func TestReconcileSecurityGroupRemoveServiceRemovesPort(t *testing.T) { lb, _ := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true) lbStatus, _ := az.getServiceLoadBalancerStatus(&svc, lb) - sg, err := az.reconcileSecurityGroup(testClusterName, &svcUpdated, lbStatus, true /* wantLb */) + sg, err := az.reconcileSecurityGroup(testClusterName, &svcUpdated, &lbStatus.Ingress[0].IP, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -738,7 +738,7 @@ func TestReconcileSecurityWithSourceRanges(t *testing.T) { lb, _ := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true) lbStatus, _ := az.getServiceLoadBalancerStatus(&svc, lb) - sg, err := az.reconcileSecurityGroup(testClusterName, &svc, lbStatus, true /* wantLb */) + sg, err := az.reconcileSecurityGroup(testClusterName, &svc, &lbStatus.Ingress[0].IP, true /* wantLb */) if err != nil { t.Errorf("Unexpected error: %q", err) } @@ -1266,19 +1266,73 @@ func validatePublicIP(t *testing.T, publicIP *network.PublicIPAddress, service * // Becuase service properties are updated outside of cloudprovider code } +func contains(ruleValues []string, targetValue string) bool { + for _, ruleValue := range ruleValues { + if strings.EqualFold(ruleValue, targetValue) { + return true + } + } + return false +} + +func securityRuleMatches(serviceSourceRange string, servicePort v1.ServicePort, serviceIP string, securityRule network.SecurityRule) error { + ruleSource := securityRule.SourceAddressPrefixes + if ruleSource == nil || len(*ruleSource) == 0 { + if securityRule.SourceAddressPrefix == nil { + ruleSource = &[]string{} + } else { + ruleSource = &[]string{*securityRule.SourceAddressPrefix} + } + } + + rulePorts := securityRule.DestinationPortRanges + if rulePorts == nil || len(*rulePorts) == 0 { + if securityRule.DestinationPortRange == nil { + rulePorts = &[]string{} + } else { + rulePorts = &[]string{*securityRule.DestinationPortRange} + } + } + + ruleDestination := securityRule.DestinationAddressPrefixes + if ruleDestination == nil || len(*ruleDestination) == 0 { + if securityRule.DestinationAddressPrefix == nil { + ruleDestination = &[]string{} + } else { + ruleDestination = &[]string{*securityRule.DestinationAddressPrefix} + } + } + + if !contains(*ruleSource, serviceSourceRange) { + return fmt.Errorf("Rule does not contain source %s", serviceSourceRange) + } + + if !contains(*rulePorts, fmt.Sprintf("%d", servicePort.Port)) { + return fmt.Errorf("Rule does not contain port %d", servicePort.Port) + } + + if serviceIP != "" && !contains(*ruleDestination, serviceIP) { + return fmt.Errorf("Rule does not contain destination %s", serviceIP) + } + + return nil +} + func validateSecurityGroup(t *testing.T, securityGroup *network.SecurityGroup, services ...v1.Service) { - expectedRuleCount := 0 + seenRules := make(map[string]string) for _, svc := range services { for _, wantedRule := range svc.Spec.Ports { sources := getServiceSourceRanges(&svc) for _, source := range sources { wantedRuleName := getSecurityRuleName(&svc, wantedRule, source) - expectedRuleCount++ + seenRules[wantedRuleName] = wantedRuleName foundRule := false for _, actualRule := range *securityGroup.SecurityRules { - if strings.EqualFold(*actualRule.Name, wantedRuleName) && - *actualRule.SourceAddressPrefix == source && - *actualRule.DestinationPortRange == fmt.Sprintf("%d", wantedRule.Port) { + if strings.EqualFold(*actualRule.Name, wantedRuleName) { + err := securityRuleMatches(source, wantedRule, svc.Spec.LoadBalancerIP, actualRule) + if err != nil { + t.Errorf("Found matching security rule %q but properties were incorrect: %v", wantedRuleName, err) + } foundRule = true break } @@ -1291,6 +1345,7 @@ func validateSecurityGroup(t *testing.T, securityGroup *network.SecurityGroup, s } lenRules := len(*securityGroup.SecurityRules) + expectedRuleCount := len(seenRules) if lenRules != expectedRuleCount { t.Errorf("Expected the loadbalancer to have %d rules. Found %d.\n", expectedRuleCount, lenRules) } @@ -1698,3 +1753,831 @@ func addTestSubnet(t *testing.T, az *Cloud, svc *v1.Service) { } svc.Annotations[ServiceAnnotationLoadBalancerInternalSubnet] = subName } + +func TestIfServiceSpecifiesSharedRuleAndRuleDoesNotExistItIsCreated(t *testing.T) { + az := getTestCloud() + svc := getTestService("servicesr", v1.ProtocolTCP, 80) + svc.Spec.LoadBalancerIP = "192.168.77.88" + svc.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc, to.StringPtr(svc.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + + validateSecurityGroup(t, sg, svc) + + expectedRuleName := "shared-TCP-80-Internet" + _, securityRule, ruleFound := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName) + if !ruleFound { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 80}, "192.168.77.88", securityRule) + if err != nil { + t.Errorf("Shared rule was not updated with new service IP: %v", err) + } + + if securityRule.Priority == nil { + t.Errorf("Shared rule %s had no priority", expectedRuleName) + } + + if securityRule.Access != network.SecurityRuleAccessAllow { + t.Errorf("Shared rule %s did not have Allow access", expectedRuleName) + } + + if securityRule.Direction != network.SecurityRuleDirectionInbound { + t.Errorf("Shared rule %s did not have Inbound direction", expectedRuleName) + } +} + +func TestIfServiceSpecifiesSharedRuleAndRuleExistsThenTheServicesPortAndAddressAreAdded(t *testing.T) { + az := getTestCloud() + svc := getTestService("servicesr", v1.ProtocolTCP, 80) + svc.Spec.LoadBalancerIP = "192.168.77.88" + svc.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName := "shared-TCP-80-Internet" + + sg := getTestSecurityGroup(az) + sg.SecurityRules = &[]network.SecurityRule{ + { + Name: &expectedRuleName, + SecurityRulePropertiesFormat: &network.SecurityRulePropertiesFormat{ + Protocol: network.SecurityRuleProtocolTCP, + SourcePortRange: to.StringPtr("*"), + SourceAddressPrefix: to.StringPtr("Internet"), + DestinationPortRange: to.StringPtr("80"), + DestinationAddressPrefix: to.StringPtr("192.168.33.44"), + Access: network.SecurityRuleAccessAllow, + Direction: network.SecurityRuleDirectionInbound, + }, + }, + } + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc, to.StringPtr(svc.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error: %q", err) + } + + validateSecurityGroup(t, sg, svc) + + _, securityRule, ruleFound := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName) + if !ruleFound { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName) + } + + expectedDestinationIPCount := 2 + if len(*securityRule.DestinationAddressPrefixes) != expectedDestinationIPCount { + t.Errorf("Shared rule should have had %d destination IP addresses but had %d", expectedDestinationIPCount, len(*securityRule.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 80}, "192.168.33.44", securityRule) + if err != nil { + t.Errorf("Shared rule no longer matched other service IP: %v", err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 80}, "192.168.77.88", securityRule) + if err != nil { + t.Errorf("Shared rule was not updated with new service IP: %v", err) + } +} + +func TestIfServicesSpecifySharedRuleButDifferentPortsThenSeparateRulesAreCreated(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 8888) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName1 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-TCP-8888-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2) + + _, securityRule1, rule1Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName1) + if !rule1Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName1) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount1 := 1 + if len(*securityRule1.DestinationAddressPrefixes) != expectedDestinationIPCount1 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName1, expectedDestinationIPCount1, len(*securityRule1.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule1) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName1, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule1) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName1) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } +} + +func TestIfServicesSpecifySharedRuleButDifferentProtocolsThenSeparateRulesAreCreated(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolUDP, 4444) + svc2.Spec.LoadBalancerIP = "192.168.77.88" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName1 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-UDP-4444-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2) + + _, securityRule1, rule1Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName1) + if !rule1Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName1) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount1 := 1 + if len(*securityRule1.DestinationAddressPrefixes) != expectedDestinationIPCount1 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName1, expectedDestinationIPCount1, len(*securityRule1.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule1) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName1, err) + } + + if securityRule1.Protocol != network.SecurityRuleProtocolTCP { + t.Errorf("Shared rule %s should have been %s but was %s", expectedRuleName1, network.SecurityRuleProtocolTCP, securityRule1.Protocol) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + if securityRule2.Protocol != network.SecurityRuleProtocolUDP { + t.Errorf("Shared rule %s should have been %s but was %s", expectedRuleName2, network.SecurityRuleProtocolUDP, securityRule2.Protocol) + } +} + +func TestIfServicesSpecifySharedRuleButDifferentSourceAddressesThenSeparateRulesAreCreated(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 80) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Spec.LoadBalancerSourceRanges = []string{"192.168.12.0/24"} + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 80) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Spec.LoadBalancerSourceRanges = []string{"192.168.34.0/24"} + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName1 := "shared-TCP-80-192.168.12.0_24" + expectedRuleName2 := "shared-TCP-80-192.168.34.0_24" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2) + + _, securityRule1, rule1Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName1) + if !rule1Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName1) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount1 := 1 + if len(*securityRule1.DestinationAddressPrefixes) != expectedDestinationIPCount1 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName1, expectedDestinationIPCount1, len(*securityRule1.DestinationAddressPrefixes)) + } + + err = securityRuleMatches(svc1.Spec.LoadBalancerSourceRanges[0], v1.ServicePort{Port: 80}, "192.168.77.88", securityRule1) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName1, err) + } + + err = securityRuleMatches(svc2.Spec.LoadBalancerSourceRanges[0], v1.ServicePort{Port: 80}, "192.168.33.44", securityRule1) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName1) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches(svc2.Spec.LoadBalancerSourceRanges[0], v1.ServicePort{Port: 80}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches(svc1.Spec.LoadBalancerSourceRanges[0], v1.ServicePort{Port: 80}, "192.168.77.88", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } +} + +func TestIfServicesSpecifySharedRuleButSomeAreOnDifferentPortsThenRulesAreSeparatedOrConsoliatedByPort(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 8888) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc3 := getTestService("servicesr3", v1.ProtocolTCP, 4444) + svc3.Spec.LoadBalancerIP = "192.168.99.11" + svc3.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName13 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-TCP-8888-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc3, to.StringPtr(svc3.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc3: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2, svc3) + + _, securityRule13, rule13Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName13) + if !rule13Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName13) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount13 := 2 + if len(*securityRule13.DestinationAddressPrefixes) != expectedDestinationIPCount13 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName13, expectedDestinationIPCount13, len(*securityRule13.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule13) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName13, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule13) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName13, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule13) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName13) + } + + if securityRule13.Priority == nil { + t.Errorf("Shared rule %s had no priority", expectedRuleName13) + } + + if securityRule13.Access != network.SecurityRuleAccessAllow { + t.Errorf("Shared rule %s did not have Allow access", expectedRuleName13) + } + + if securityRule13.Direction != network.SecurityRuleDirectionInbound { + t.Errorf("Shared rule %s did not have Inbound direction", expectedRuleName13) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } +} + +func TestIfServiceSpecifiesSharedRuleAndServiceIsDeletedThenTheServicesPortAndAddressAreRemoved(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 80) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 80) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName := "shared-TCP-80-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2) + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc1: %q", err) + } + + validateSecurityGroup(t, sg, svc2) + + _, securityRule, ruleFound := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName) + if !ruleFound { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName) + } + + expectedDestinationIPCount := 1 + if len(*securityRule.DestinationAddressPrefixes) != expectedDestinationIPCount { + t.Errorf("Shared rule should have had %d destination IP addresses but had %d", expectedDestinationIPCount, len(*securityRule.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 80}, "192.168.33.44", securityRule) + if err != nil { + t.Errorf("Shared rule no longer matched other service IP: %v", err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 80}, "192.168.77.88", securityRule) + if err == nil { + t.Error("Shared rule was not updated to remove deleted service IP") + } +} + +func TestIfSomeServicesShareARuleAndOneIsDeletedItIsRemovedFromTheRightRule(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 8888) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc3 := getTestService("servicesr3", v1.ProtocolTCP, 4444) + svc3.Spec.LoadBalancerIP = "192.168.99.11" + svc3.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName13 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-TCP-8888-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc3, to.StringPtr(svc3.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc3: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2, svc3) + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc1: %q", err) + } + + validateSecurityGroup(t, sg, svc2, svc3) + + _, securityRule13, rule13Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName13) + if !rule13Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName13) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount13 := 1 + if len(*securityRule13.DestinationAddressPrefixes) != expectedDestinationIPCount13 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName13, expectedDestinationIPCount13, len(*securityRule13.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule13) + if err == nil { + t.Errorf("Shared rule %s should have had svc1 removed but did not", expectedRuleName13) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule13) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName13, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule13) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName13) + } + + if securityRule13.Priority == nil { + t.Errorf("Shared rule %s had no priority", expectedRuleName13) + } + + if securityRule13.Access != network.SecurityRuleAccessAllow { + t.Errorf("Shared rule %s did not have Allow access", expectedRuleName13) + } + + if securityRule13.Direction != network.SecurityRuleDirectionInbound { + t.Errorf("Shared rule %s did not have Inbound direction", expectedRuleName13) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } +} + +func TestIfServiceSpecifiesSharedRuleAndLastServiceIsDeletedThenRuleIsDeleted(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 8888) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc3 := getTestService("servicesr3", v1.ProtocolTCP, 4444) + svc3.Spec.LoadBalancerIP = "192.168.99.11" + svc3.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + expectedRuleName13 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-TCP-8888-Internet" + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc3, to.StringPtr(svc3.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc3: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2, svc3) + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc3, to.StringPtr(svc3.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc3: %q", err) + } + + validateSecurityGroup(t, sg, svc2) + + _, _, rule13Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName13) + if rule13Found { + t.Fatalf("Expected security rule %q to have been deleted but it was still present", expectedRuleName13) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong service's port and IP", expectedRuleName2) + } +} + +func TestCanCombineSharedAndPrivateRulesInSameGroup(t *testing.T) { + az := getTestCloud() + + svc1 := getTestService("servicesr1", v1.ProtocolTCP, 4444) + svc1.Spec.LoadBalancerIP = "192.168.77.88" + svc1.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc2 := getTestService("servicesr2", v1.ProtocolTCP, 8888) + svc2.Spec.LoadBalancerIP = "192.168.33.44" + svc2.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc3 := getTestService("servicesr3", v1.ProtocolTCP, 4444) + svc3.Spec.LoadBalancerIP = "192.168.99.11" + svc3.Annotations[ServiceAnnotationSharedSecurityRule] = "true" + + svc4 := getTestService("servicesr4", v1.ProtocolTCP, 4444) + svc4.Spec.LoadBalancerIP = "192.168.22.33" + svc4.Annotations[ServiceAnnotationSharedSecurityRule] = "false" + + svc5 := getTestService("servicesr5", v1.ProtocolTCP, 8888) + svc5.Spec.LoadBalancerIP = "192.168.22.33" + svc5.Annotations[ServiceAnnotationSharedSecurityRule] = "false" + + expectedRuleName13 := "shared-TCP-4444-Internet" + expectedRuleName2 := "shared-TCP-8888-Internet" + expectedRuleName4 := getSecurityRuleName(&svc4, v1.ServicePort{Port: 4444, Protocol: v1.ProtocolTCP}, "Internet") + expectedRuleName5 := getSecurityRuleName(&svc5, v1.ServicePort{Port: 8888, Protocol: v1.ProtocolTCP}, "Internet") + + sg := getTestSecurityGroup(az) + + sg, err := az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc2, to.StringPtr(svc2.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc2: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc3, to.StringPtr(svc3.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc3: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc4, to.StringPtr(svc4.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc4: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc5, to.StringPtr(svc5.Spec.LoadBalancerIP), true) + if err != nil { + t.Errorf("Unexpected error adding svc4: %q", err) + } + + validateSecurityGroup(t, sg, svc1, svc2, svc3, svc4, svc5) + + expectedRuleCount := 4 + if len(*sg.SecurityRules) != expectedRuleCount { + t.Errorf("Expected security group to have %d rules but it had %d", expectedRuleCount, len(*sg.SecurityRules)) + } + + _, securityRule13, rule13Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName13) + if !rule13Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName13) + } + + _, securityRule2, rule2Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + _, securityRule4, rule4Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName4) + if !rule4Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName4) + } + + _, securityRule5, rule5Found := findSecurityRuleByName(*sg.SecurityRules, expectedRuleName5) + if !rule5Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName5) + } + + expectedDestinationIPCount13 := 2 + if len(*securityRule13.DestinationAddressPrefixes) != expectedDestinationIPCount13 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName13, expectedDestinationIPCount13, len(*securityRule13.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.77.88", securityRule13) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName13, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.99.11", securityRule13) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName13, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 4444}, "192.168.22.33", securityRule13) + if err == nil { + t.Errorf("Shared rule %s matched wrong (unshared) service's port and IP", expectedRuleName13) + } + + expectedDestinationIPCount2 := 1 + if len(*securityRule2.DestinationAddressPrefixes) != expectedDestinationIPCount2 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName2, expectedDestinationIPCount2, len(*securityRule2.DestinationAddressPrefixes)) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.33.44", securityRule2) + if err != nil { + t.Errorf("Shared rule %s did not match service IP: %v", expectedRuleName2, err) + } + + err = securityRuleMatches("Internet", v1.ServicePort{Port: 8888}, "192.168.22.33", securityRule2) + if err == nil { + t.Errorf("Shared rule %s matched wrong (unshared) service's port and IP", expectedRuleName2) + } + + if securityRule4.DestinationAddressPrefixes != nil { + t.Errorf("Expected unshared rule %s to use single destination IP address but used collection", expectedRuleName4) + } + + if securityRule4.DestinationAddressPrefix == nil { + t.Errorf("Expected unshared rule %s to have a destination IP address", expectedRuleName4) + } else { + if !strings.EqualFold(*securityRule4.DestinationAddressPrefix, svc4.Spec.LoadBalancerIP) { + t.Errorf("Expected unshared rule %s to have a destination %s but had %s", expectedRuleName4, svc4.Spec.LoadBalancerIP, *securityRule4.DestinationAddressPrefix) + } + } + + if securityRule5.DestinationAddressPrefixes != nil { + t.Errorf("Expected unshared rule %s to use single destination IP address but used collection", expectedRuleName5) + } + + if securityRule5.DestinationAddressPrefix == nil { + t.Errorf("Expected unshared rule %s to have a destination IP address", expectedRuleName5) + } else { + if !strings.EqualFold(*securityRule5.DestinationAddressPrefix, svc5.Spec.LoadBalancerIP) { + t.Errorf("Expected unshared rule %s to have a destination %s but had %s", expectedRuleName5, svc5.Spec.LoadBalancerIP, *securityRule5.DestinationAddressPrefix) + } + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc1, to.StringPtr(svc1.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc1: %q", err) + } + + sg, err = az.reconcileSecurityGroup(testClusterName, &svc5, to.StringPtr(svc5.Spec.LoadBalancerIP), false) + if err != nil { + t.Errorf("Unexpected error removing svc5: %q", err) + } + + _, securityRule13, rule13Found = findSecurityRuleByName(*sg.SecurityRules, expectedRuleName13) + if !rule13Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName13) + } + + _, securityRule2, rule2Found = findSecurityRuleByName(*sg.SecurityRules, expectedRuleName2) + if !rule2Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName2) + } + + _, securityRule4, rule4Found = findSecurityRuleByName(*sg.SecurityRules, expectedRuleName4) + if !rule4Found { + t.Fatalf("Expected security rule %q but it was not present", expectedRuleName4) + } + + _, _, rule5Found = findSecurityRuleByName(*sg.SecurityRules, expectedRuleName5) + if rule5Found { + t.Fatalf("Expected security rule %q to have been removed but it was not present", expectedRuleName5) + } + + expectedDestinationIPCount13 = 1 + if len(*securityRule13.DestinationAddressPrefixes) != expectedDestinationIPCount13 { + t.Errorf("Shared rule %s should have had %d destination IP addresses but had %d", expectedRuleName13, expectedDestinationIPCount13, len(*securityRule13.DestinationAddressPrefixes)) + } +} + +// TODO: sanity check if the same IP address incorrectly gets put in twice? +// (shouldn't happen but...) + +// func TestIfServiceIsEditedFromOwnRuleToSharedRuleThenOwnRuleIsDeletedAndSharedRuleIsCreated(t *testing.T) { +// t.Error() +// } + +// func TestIfServiceIsEditedFromSharedRuleToOwnRuleThenItIsRemovedFromSharedRuleAndOwnRuleIsCreated(t *testing.T) { +// t.Error() +// } diff --git a/pkg/cloudprovider/providers/azure/azure_util.go b/pkg/cloudprovider/providers/azure/azure_util.go index 04ff821e76a..7d2aa565c4a 100644 --- a/pkg/cloudprovider/providers/azure/azure_util.go +++ b/pkg/cloudprovider/providers/azure/azure_util.go @@ -340,6 +340,10 @@ func getLoadBalancerRuleName(service *v1.Service, port v1.ServicePort, subnetNam } func getSecurityRuleName(service *v1.Service, port v1.ServicePort, sourceAddrPrefix string) string { + if useSharedSecurityRule(service) { + safePrefix := strings.Replace(sourceAddrPrefix, "/", "_", -1) + return fmt.Sprintf("shared-%s-%d-%s", port.Protocol, port.Port, safePrefix) + } safePrefix := strings.Replace(sourceAddrPrefix, "/", "_", -1) return fmt.Sprintf("%s-%s-%d-%s", getRulePrefix(service), port.Protocol, port.Port, safePrefix) } From 1e3ec2b639d8dfffe6ecdfa8775d4558f7f4c174 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Fri, 17 Nov 2017 09:33:05 -0800 Subject: [PATCH 17/18] correct doc for reconcileSecurityGroup --- pkg/cloudprovider/providers/azure/azure_loadbalancer.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md index 05a560b75b7..141d066cf15 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.md +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.md @@ -22,8 +22,10 @@ Service Annotation for Auto and specific load balancer mode - Call az cloud to CreateOrUpdate on this lb, or Delete if nothing left - return lb, err -- reconcileSecurityGroup(clusterName string, service *v1.Service, lbStatus *v1.LoadBalancerStatus, wantLb bool) (*network.SecurityGroup, error) +- reconcileSecurityGroup(clusterName string, service *v1.Service, lbIP *string, wantLb bool) (*network.SecurityGroup, error) - Go though NSG' properties, update based on wantLb + - Use destinationIPAddress as target address if possible + - Consolidate NSG rules if possible - If any change on the NSG, (the NSG should always exists) - Call az cloud to CreateOrUpdate on this NSG - return sg, err From 35964d4a80e7f931e51860109fd0741b4c93be53 Mon Sep 17 00:00:00 2001 From: Jingtao Ren Date: Mon, 20 Nov 2017 09:53:34 -0800 Subject: [PATCH 18/18] fix rebase test error --- .../providers/azure/azure_test.go | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index db6fac8d3f7..f22182ab1b6 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -581,17 +581,13 @@ func findLBRuleForPort(lbRules []network.LoadBalancingRule, port int32) (network func TestServiceDefaultsToNoSessionPersistence(t *testing.T) { az := getTestCloud() svc := getTestService("service-sa-omitted", v1.ProtocolTCP, 7170) - configProperties := getTestPublicFipConfigurationProperties() - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + clusterResources := getClusterResources(az, 1, 1) - lb, _, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling svc1: %q", err) } - validateLoadBalancer(t, lb, svc) - lbRule, err := findLBRuleForPort(*lb.LoadBalancingRules, 7170) if err != nil { t.Error(err) @@ -606,11 +602,9 @@ func TestServiceRespectsNoSessionAffinity(t *testing.T) { az := getTestCloud() svc := getTestService("service-sa-none", v1.ProtocolTCP, 7170) svc.Spec.SessionAffinity = v1.ServiceAffinityNone - configProperties := getTestPublicFipConfigurationProperties() - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + clusterResources := getClusterResources(az, 1, 1) - lb, _, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling svc1: %q", err) } @@ -631,11 +625,9 @@ func TestServiceRespectsClientIPSessionAffinity(t *testing.T) { az := getTestCloud() svc := getTestService("service-sa-clientip", v1.ProtocolTCP, 7170) svc.Spec.SessionAffinity = v1.ServiceAffinityClientIP - configProperties := getTestPublicFipConfigurationProperties() - lb := getTestLoadBalancer() - nodes := []*v1.Node{} + clusterResources := getClusterResources(az, 1, 1) - lb, _, err := az.reconcileLoadBalancer(lb, &configProperties, testClusterName, &svc, nodes) + lb, err := az.reconcileLoadBalancer(testClusterName, &svc, clusterResources.nodes, true /* wantLb */) if err != nil { t.Errorf("Unexpected error reconciling svc1: %q", err) }