diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap.go index 2686d0a6c13..e1c2810a457 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap.go @@ -23,6 +23,7 @@ import ( "net/http" "regexp" "strings" + "sync" "time" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" @@ -44,6 +45,8 @@ var ( azureResourceGroupNameRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/(?:.*)`) ) +const vmListCacheKey = "vmListCacheKey" + // checkExistsFromError inspects an error and returns a true if err is nil, // false if error is an autorest.Error with StatusCode=404 and will return the // error back if error is another status code or another type of error. @@ -62,18 +65,44 @@ func checkResourceExistsFromError(err *retry.Error) (bool, *retry.Error) { /// getVirtualMachine calls 'VirtualMachinesClient.Get' with a timed cache /// The service side has throttling control that delays responses if there're multiple requests onto certain vm /// resource request in short period. -func (az *Cloud) getVirtualMachine(nodeName types.NodeName, crt cacheReadType) (vm compute.VirtualMachine, err error) { +func (az *Cloud) getVirtualMachine(nodeName types.NodeName, crt cacheReadType) (compute.VirtualMachine, error) { vmName := string(nodeName) - cachedVM, err := az.vmCache.Get(vmName, crt) + getter := func(vmName string) (*compute.VirtualMachine, error) { + cachedVM, err := az.vmCache.Get(vmListCacheKey, crt) + if err != nil { + return nil, err + } + + virtualMachines := cachedVM.(*sync.Map) + if vm, ok := virtualMachines.Load(vmName); ok { + result := vm.(*vmEntry) + return result.vm, nil + } + + return nil, nil + } + + vm, err := getter(vmName) if err != nil { - return vm, err + return compute.VirtualMachine{}, err } - if cachedVM == nil { - return vm, cloudprovider.InstanceNotFound + if vm != nil { + return *vm, nil } - return *(cachedVM.(*compute.VirtualMachine)), nil + klog.V(2).Infof("Couldn't find VM with name %s, refreshing the cache", vmName) + az.vmCache.Delete(vmListCacheKey) + + vm, err = getter(vmName) + if err != nil { + return compute.VirtualMachine{}, err + } + + if vm == nil { + return compute.VirtualMachine{}, cloudprovider.InstanceNotFound + } + return *vm, nil } func (az *Cloud) getRouteTable(crt cacheReadType) (routeTable network.RouteTable, exists bool, err error) { @@ -166,6 +195,11 @@ func (az *Cloud) getSecurityGroup(crt cacheReadType) (network.SecurityGroup, err return *(securityGroup.(*network.SecurityGroup)), nil } +type vmEntry struct { + vm *compute.VirtualMachine + lastUpdate time.Time +} + func (az *Cloud) newVMCache() (*timedCache, error) { getter := func(key string) (interface{}, error) { // Currently InstanceView request are used by azure_zones, while the calls come after non-InstanceView @@ -182,18 +216,30 @@ func (az *Cloud) newVMCache() (*timedCache, error) { return nil, err } - vm, verr := az.VirtualMachinesClient.Get(ctx, resourceGroup, key, compute.InstanceView) + vmList, verr := az.VirtualMachinesClient.List(ctx, resourceGroup) exists, rerr := checkResourceExistsFromError(verr) if rerr != nil { return nil, rerr.Error() } if !exists { - klog.V(2).Infof("Virtual machine %q not found", key) + klog.V(2).Infof("Virtual machine under resource group %q not found", resourceGroup) return nil, nil } - return &vm, nil + localCache := &sync.Map{} + for _, vm := range vmList { + if vm.Name == nil || *vm.Name == "" { + klog.Warning("failed to get the name of VM") + continue + } + localCache.Store(*vm.Name, &vmEntry{ + vm: &vm, + lastUpdate: time.Now().UTC(), + }) + } + + return localCache, nil } if az.VMCacheTTLInSeconds == 0 { diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap_test.go index 73d9b69fd7f..8bf32faa805 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_wrap_test.go @@ -19,12 +19,19 @@ limitations under the License. package azure import ( + "context" "net/http" "reflect" "testing" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" + "github.com/Azure/go-autorest/autorest/to" "github.com/stretchr/testify/assert" + + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" + "k8s.io/legacy-cloud-providers/azure/auth" "k8s.io/legacy-cloud-providers/azure/retry" ) @@ -282,3 +289,64 @@ func TestIsBackendPoolOnSameLB(t *testing.T) { assert.Equal(t, test.expectedLBName, lbName) } } + +func TestVMCache(t *testing.T) { + vmList := []string{"vm000000", "vm000001", "vm000002"} + az := getTestCloudForVMCache(vmList) + + // validate getting VM via cache. + virtualMachines, err := az.VirtualMachinesClient.List( + context.Background(), "rg") + assert.Nil(t, err) + assert.Equal(t, 3, len(virtualMachines)) + for i := range virtualMachines { + vm := virtualMachines[i] + vmName := to.String(vm.Name) + realVM, err := az.getVirtualMachine(types.NodeName(vmName), cacheReadTypeDefault) + assert.NoError(t, err) + assert.Equal(t, vm, realVM) + } +} + +func getTestCloudForVMCache(vmList []string) (az *Cloud) { + az = &Cloud{ + Config: Config{ + AzureAuthConfig: auth.AzureAuthConfig{ + TenantID: "tenant", + SubscriptionID: "subscription", + }, + ResourceGroup: "rg", + VnetResourceGroup: "rg", + RouteTableResourceGroup: "rg", + Location: "westus", + VnetName: "vnet", + SubnetName: "subnet", + SecurityGroupName: "nsg", + RouteTableName: "rt", + PrimaryAvailabilitySetName: "as", + MaximumLoadBalancerRuleCount: 250, + VMType: vmTypeStandard, + }, + nodeZones: map[string]sets.String{}, + nodeResourceGroups: map[string]string{}, + unmanagedNodes: sets.NewString(), + routeCIDRs: map[string]string{}, + eventRecorder: &record.FakeRecorder{}, + } + virtualMachinesClient := newFakeAzureVirtualMachinesClient() + + store := make(map[string]map[string]compute.VirtualMachine) + store["rg"] = map[string]compute.VirtualMachine{} + for _, vm := range vmList { + store["rg"][vm] = compute.VirtualMachine{ + Name: &vm, + } + } + + virtualMachinesClient.setFakeStore(store) + az.VirtualMachinesClient = virtualMachinesClient + az.vmCache, _ = az.newVMCache() + az.controllerCommon = &controllerCommon{cloud: az} + + return az +}