diff --git a/pkg/cloudprovider/providers/azure/azure.go b/pkg/cloudprovider/providers/azure/azure.go index 241920e5f04..d42c8d4669e 100644 --- a/pkg/cloudprovider/providers/azure/azure.go +++ b/pkg/cloudprovider/providers/azure/azure.go @@ -135,6 +135,8 @@ type Cloud struct { VirtualMachineScaleSetsClient VirtualMachineScaleSetsClient VirtualMachineScaleSetVMsClient VirtualMachineScaleSetVMsClient + vmCache *timedCache + *BlobDiskController *ManagedDiskController *controllerCommon @@ -244,6 +246,12 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) { az.vmSet = newAvailabilitySet(&az) } + vmCache, err := az.newVMCache() + if err != nil { + return nil, err + } + az.vmCache = vmCache + if err := initDiskControllers(&az); err != nil { return nil, err } diff --git a/pkg/cloudprovider/providers/azure/azure_controllerCommon.go b/pkg/cloudprovider/providers/azure/azure_controllerCommon.go index 463cd04f4a8..e7d70687411 100644 --- a/pkg/cloudprovider/providers/azure/azure_controllerCommon.go +++ b/pkg/cloudprovider/providers/azure/azure_controllerCommon.go @@ -124,7 +124,7 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri } else { glog.V(4).Info("azureDisk - azure attach succeeded") // Invalidate the cache right after updating - vmCache.Delete(vmName) + c.cloud.vmCache.Delete(vmName) } return err } @@ -183,7 +183,7 @@ func (c *controllerCommon) DetachDiskByName(diskName, diskURI string, nodeName t } else { glog.V(4).Info("azureDisk - azure disk detach succeeded") // Invalidate the cache right after updating - vmCache.Delete(vmName) + c.cloud.vmCache.Delete(vmName) } return err } diff --git a/pkg/cloudprovider/providers/azure/azure_test.go b/pkg/cloudprovider/providers/azure/azure_test.go index 21fcf0bbe94..03953383438 100644 --- a/pkg/cloudprovider/providers/azure/azure_test.go +++ b/pkg/cloudprovider/providers/azure/azure_test.go @@ -878,6 +878,7 @@ func getTestCloud() (az *Cloud) { az.VirtualMachineScaleSetVMsClient = newFakeVirtualMachineScaleSetVMsClient() az.VirtualMachinesClient = newFakeAzureVirtualMachinesClient() az.vmSet = newAvailabilitySet(az) + az.vmCache, _ = az.newVMCache() return az } diff --git a/pkg/cloudprovider/providers/azure/azure_wrap.go b/pkg/cloudprovider/providers/azure/azure_wrap.go index e37ac8ca950..70dbd41740d 100644 --- a/pkg/cloudprovider/providers/azure/azure_wrap.go +++ b/pkg/cloudprovider/providers/azure/azure_wrap.go @@ -18,18 +18,20 @@ package azure import ( "net/http" - "sync" "time" "github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/go-autorest/autorest" - "github.com/golang/glog" "k8s.io/apimachinery/pkg/types" "k8s.io/kubernetes/pkg/cloudprovider" ) +var ( + vmCacheTTL = time.Minute +) + // checkExistsFromError inspects an error and returns a true if err is nil, // false if error is an autorest.Error with StatusCode=404 and will return the // error back if error is another status code or another type of error. @@ -60,59 +62,21 @@ func ignoreStatusNotFoundFromError(err error) error { return err } -// cache used by getVirtualMachine -// 15s for expiration duration -var vmCache = newTimedcache(15 * time.Second) - -type vmRequest struct { - lock *sync.Mutex - vm *compute.VirtualMachine -} - /// getVirtualMachine calls 'VirtualMachinesClient.Get' with a timed cache /// The service side has throttling control that delays responses if there're multiple requests onto certain vm /// resource request in short period. func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachine, err error) { vmName := string(nodeName) - - cachedRequest, err := vmCache.GetOrCreate(vmName, func() interface{} { - return &vmRequest{ - lock: &sync.Mutex{}, - vm: nil, - } - }) + cachedVM, err := az.vmCache.Get(vmName) if err != nil { - return compute.VirtualMachine{}, err - } - request := cachedRequest.(*vmRequest) - - if request.vm == nil { - request.lock.Lock() - defer request.lock.Unlock() - if request.vm == nil { - // Currently InstanceView request are used by azure_zones, while the calls come after non-InstanceView - // request. If we first send an InstanceView request and then a non InstanceView request, the second - // request will still hit throttling. This is what happens now for cloud controller manager: In this - // case we do get instance view every time to fulfill the azure_zones requirement without hitting - // throttling. - // Consider adding separate parameter for controlling 'InstanceView' once node update issue #56276 is fixed - vm, err = az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, compute.InstanceView) - exists, realErr := checkResourceExistsFromError(err) - if realErr != nil { - return vm, realErr - } - - if !exists { - return vm, cloudprovider.InstanceNotFound - } - - request.vm = &vm - } - return *request.vm, nil + return vm, err } - glog.V(6).Infof("getVirtualMachine hits cache for(%s)", vmName) - return *request.vm, nil + if cachedVM == nil { + return vm, cloudprovider.InstanceNotFound + } + + return *(cachedVM.(*compute.VirtualMachine)), nil } func (az *Cloud) getRouteTable() (routeTable network.RouteTable, exists bool, err error) { @@ -189,3 +153,21 @@ func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exi return lb, exists, err } + +func (az *Cloud) newVMCache() (*timedCache, error) { + getter := func(key string) (interface{}, error) { + vm, err := az.VirtualMachinesClient.Get(az.ResourceGroup, key, compute.InstanceView) + exists, realErr := checkResourceExistsFromError(err) + if realErr != nil { + return nil, realErr + } + + if !exists { + return nil, nil + } + + return &vm, nil + } + + return newTimedcache(vmCacheTTL, getter) +}