Merge pull request #59520 from feiskyer/new-cache

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Add generic cache for Azure VM/LB/NSG/RouteTable

**What this PR does / why we need it**:

Part of #58770. This PR adds a generic cache of Azure VM/LB/NSG/RouteTable for reducing ARM calls.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Part of #58770

**Special notes for your reviewer**:

**Release note**:

```release-note
Add generic cache for Azure VM/LB/NSG/RouteTable
```
This commit is contained in:
Kubernetes Submit Queue 2018-02-10 19:06:02 -08:00 committed by GitHub
commit f0e573d6d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 367 additions and 163 deletions

View File

@ -135,6 +135,11 @@ type Cloud struct {
VirtualMachineScaleSetsClient VirtualMachineScaleSetsClient VirtualMachineScaleSetsClient VirtualMachineScaleSetsClient
VirtualMachineScaleSetVMsClient VirtualMachineScaleSetVMsClient VirtualMachineScaleSetVMsClient VirtualMachineScaleSetVMsClient
vmCache *timedCache
lbCache *timedCache
nsgCache *timedCache
rtCache *timedCache
*BlobDiskController *BlobDiskController
*ManagedDiskController *ManagedDiskController
*controllerCommon *controllerCommon
@ -244,6 +249,26 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
az.vmSet = newAvailabilitySet(&az) az.vmSet = newAvailabilitySet(&az)
} }
az.vmCache, err = az.newVMCache()
if err != nil {
return nil, err
}
az.lbCache, err = az.newLBCache()
if err != nil {
return nil, err
}
az.nsgCache, err = az.newNSGCache()
if err != nil {
return nil, err
}
az.rtCache, err = az.newRouteTableCache()
if err != nil {
return nil, err
}
if err := initDiskControllers(&az); err != nil { if err := initDiskControllers(&az); err != nil {
return nil, err return nil, err
} }

View File

@ -131,7 +131,12 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
resp := <-respChan resp := <-respChan
err := <-errChan err := <-errChan
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name) glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
return processRetryResponse(resp.Response, err) done, err := processRetryResponse(resp.Response, err)
if done && err == nil {
// Invalidate the cache right after updating
az.nsgCache.Delete(*sg.Name)
}
return done, err
}) })
} }
@ -142,7 +147,12 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
resp := <-respChan resp := <-respChan
err := <-errChan err := <-errChan
glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name) glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
return processRetryResponse(resp.Response, err) done, err := processRetryResponse(resp.Response, err)
if done && err == nil {
// Invalidate the cache right after updating
az.lbCache.Delete(*lb.Name)
}
return done, err
}) })
} }
@ -283,7 +293,12 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error {
respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
resp := <-respChan resp := <-respChan
err := <-errChan err := <-errChan
return processRetryResponse(resp, err) done, err := processRetryResponse(resp, err)
if done && err == nil {
// Invalidate the cache right after deleting
az.lbCache.Delete(lbName)
}
return done, err
}) })
} }

View File

@ -17,40 +17,58 @@ limitations under the License.
package azure package azure
import ( import (
"fmt"
"sync" "sync"
"time" "time"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
) )
type timedcacheEntry struct { // getFunc defines a getter function for timedCache.
type getFunc func(key string) (interface{}, error)
// cacheEntry is the internal structure stores inside TTLStore.
type cacheEntry struct {
key string key string
data interface{} data interface{}
// The lock to ensure not updating same entry simultaneously.
lock sync.Mutex
} }
type timedcache struct { // cacheKeyFunc defines the key function required in TTLStore.
store cache.Store
lock sync.Mutex
}
// ttl time.Duration
func newTimedcache(ttl time.Duration) timedcache {
return timedcache{
store: cache.NewTTLStore(cacheKeyFunc, ttl),
}
}
func cacheKeyFunc(obj interface{}) (string, error) { func cacheKeyFunc(obj interface{}) (string, error) {
return obj.(*timedcacheEntry).key, nil return obj.(*cacheEntry).key, nil
} }
func (t *timedcache) GetOrCreate(key string, createFunc func() interface{}) (interface{}, error) { // timedCache is a cache with TTL.
type timedCache struct {
store cache.Store
lock sync.Mutex
getter getFunc
}
// newTimedcache creates a new timedCache.
func newTimedcache(ttl time.Duration, getter getFunc) (*timedCache, error) {
if getter == nil {
return nil, fmt.Errorf("getter is not provided")
}
return &timedCache{
getter: getter,
store: cache.NewTTLStore(cacheKeyFunc, ttl),
}, nil
}
// getInternal returns cacheEntry by key. If the key is not cached yet,
// it returns a cacheEntry with nil data.
func (t *timedCache) getInternal(key string) (*cacheEntry, error) {
entry, exists, err := t.store.GetByKey(key) entry, exists, err := t.store.GetByKey(key)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if exists { if exists {
return (entry.(*timedcacheEntry)).data, nil return entry.(*cacheEntry), nil
} }
t.lock.Lock() t.lock.Lock()
@ -60,22 +78,47 @@ func (t *timedcache) GetOrCreate(key string, createFunc func() interface{}) (int
return nil, err return nil, err
} }
if exists { if exists {
return (entry.(*timedcacheEntry)).data, nil return entry.(*cacheEntry), nil
} }
if createFunc == nil { // Still not found, add new entry with nil data.
return nil, nil // Note the data will be filled later by getter.
} newEntry := &cacheEntry{
created := createFunc()
t.store.Add(&timedcacheEntry{
key: key, key: key,
data: created, data: nil,
}) }
return created, nil t.store.Add(newEntry)
return newEntry, nil
} }
func (t *timedcache) Delete(key string) { // Get returns the requested item by key.
_ = t.store.Delete(&timedcacheEntry{ func (t *timedCache) Get(key string) (interface{}, error) {
entry, err := t.getInternal(key)
if err != nil {
return nil, err
}
// Data is still not cached yet, cache it by getter.
if entry.data == nil {
entry.lock.Lock()
defer entry.lock.Unlock()
if entry.data == nil {
data, err := t.getter(key)
if err != nil {
return nil, err
}
entry.data = data
}
}
return entry.data, nil
}
// Delete removes an item from the cache.
func (t *timedCache) Delete(key string) error {
return t.store.Delete(&cacheEntry{
key: key, key: key,
}) })
} }

View File

@ -17,80 +17,144 @@ limitations under the License.
package azure package azure
import ( import (
"sync/atomic" "fmt"
"sync"
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/assert"
) )
func TestCacheReturnsSameObject(t *testing.T) { var (
type cacheTestingStruct struct{} fakeCacheTTL = 2 * time.Second
c := newTimedcache(1 * time.Minute) )
o1 := cacheTestingStruct{}
get1, _ := c.GetOrCreate("b1", func() interface{} { type fakeDataObj struct{}
return o1
}) type fakeDataSource struct {
o2 := cacheTestingStruct{} called int
get2, _ := c.GetOrCreate("b1", func() interface{} { data map[string]*fakeDataObj
return o2 lock sync.Mutex
}) }
if get1 != get2 {
t.Error("Get not equal") func (fake *fakeDataSource) get(key string) (interface{}, error) {
fake.lock.Lock()
defer fake.lock.Unlock()
fake.called = fake.called + 1
if v, ok := fake.data[key]; ok {
return v, nil
}
return nil, nil
}
func (fake *fakeDataSource) set(data map[string]*fakeDataObj) {
fake.lock.Lock()
defer fake.lock.Unlock()
fake.data = data
fake.called = 0
}
func newFakeCache(t *testing.T) (*fakeDataSource, *timedCache) {
dataSource := &fakeDataSource{
data: make(map[string]*fakeDataObj),
}
getter := dataSource.get
cache, err := newTimedcache(fakeCacheTTL, getter)
assert.NoError(t, err)
return dataSource, cache
}
func TestCacheGet(t *testing.T) {
val := &fakeDataObj{}
cases := []struct {
name string
data map[string]*fakeDataObj
key string
expected interface{}
}{
{
name: "cache should return nil for empty data source",
key: "key1",
expected: nil,
},
{
name: "cache should return nil for non exist key",
data: map[string]*fakeDataObj{"key2": val},
key: "key1",
expected: nil,
},
{
name: "cache should return data for existing key",
data: map[string]*fakeDataObj{"key1": val},
key: "key1",
expected: val,
},
}
for _, c := range cases {
dataSource, cache := newFakeCache(t)
dataSource.set(c.data)
val, err := cache.Get(c.key)
assert.NoError(t, err, c.name)
assert.Equal(t, c.expected, val, c.name)
} }
} }
func TestCacheCallsCreateFuncOnce(t *testing.T) { func TestCacheGetError(t *testing.T) {
var callsCount uint32 getError := fmt.Errorf("getError")
f1 := func() interface{} { getter := func(key string) (interface{}, error) {
atomic.AddUint32(&callsCount, 1) return nil, getError
return 1
}
c := newTimedcache(500 * time.Millisecond)
for index := 0; index < 20; index++ {
_, _ = c.GetOrCreate("b1", f1)
} }
cache, err := newTimedcache(fakeCacheTTL, getter)
assert.NoError(t, err)
if callsCount != 1 { val, err := cache.Get("key")
t.Error("Count not match") assert.Error(t, err)
} assert.Equal(t, getError, err)
time.Sleep(500 * time.Millisecond) assert.Nil(t, val)
c.GetOrCreate("b1", f1)
if callsCount != 2 {
t.Error("Count not match")
}
}
func TestCacheExpires(t *testing.T) {
f1 := func() interface{} {
return 1
}
c := newTimedcache(500 * time.Millisecond)
get1, _ := c.GetOrCreate("b1", f1)
if get1 != 1 {
t.Error("Value not equal")
}
time.Sleep(500 * time.Millisecond)
get1, _ = c.GetOrCreate("b1", nil)
if get1 != nil {
t.Error("value not expired")
}
} }
func TestCacheDelete(t *testing.T) { func TestCacheDelete(t *testing.T) {
f1 := func() interface{} { key := "key1"
return 1 val := &fakeDataObj{}
} data := map[string]*fakeDataObj{
c := newTimedcache(500 * time.Millisecond) key: val,
get1, _ := c.GetOrCreate("b1", f1)
if get1 != 1 {
t.Error("Value not equal")
}
get1, _ = c.GetOrCreate("b1", nil)
if get1 != 1 {
t.Error("Value not equal")
}
c.Delete("b1")
get1, _ = c.GetOrCreate("b1", nil)
if get1 != nil {
t.Error("value not deleted")
} }
dataSource, cache := newFakeCache(t)
dataSource.set(data)
v, err := cache.Get(key)
assert.NoError(t, err)
assert.Equal(t, val, v, "cache should get correct data")
dataSource.set(nil)
cache.Delete(key)
v, err = cache.Get(key)
assert.NoError(t, err)
assert.Equal(t, 1, dataSource.called)
assert.Equal(t, nil, v, "cache should get nil after data is removed")
}
func TestCacheExpired(t *testing.T) {
key := "key1"
val := &fakeDataObj{}
data := map[string]*fakeDataObj{
key: val,
}
dataSource, cache := newFakeCache(t)
dataSource.set(data)
v, err := cache.Get(key)
assert.NoError(t, err)
assert.Equal(t, 1, dataSource.called)
assert.Equal(t, val, v, "cache should get correct data")
time.Sleep(fakeCacheTTL)
v, err = cache.Get(key)
assert.NoError(t, err)
assert.Equal(t, 2, dataSource.called)
assert.Equal(t, val, v, "cache should get correct data even after expired")
} }

View File

@ -124,7 +124,7 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
} else { } else {
glog.V(4).Info("azureDisk - azure attach succeeded") glog.V(4).Info("azureDisk - azure attach succeeded")
// Invalidate the cache right after updating // Invalidate the cache right after updating
vmCache.Delete(vmName) c.cloud.vmCache.Delete(vmName)
} }
return err return err
} }
@ -183,7 +183,7 @@ func (c *controllerCommon) DetachDiskByName(diskName, diskURI string, nodeName t
} else { } else {
glog.V(4).Info("azureDisk - azure disk detach succeeded") glog.V(4).Info("azureDisk - azure disk detach succeeded")
// Invalidate the cache right after updating // Invalidate the cache right after updating
vmCache.Delete(vmName) c.cloud.vmCache.Delete(vmName)
} }
return err return err
} }

View File

@ -819,7 +819,7 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
ports = []v1.ServicePort{} ports = []v1.ServicePort{}
} }
sg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, az.SecurityGroupName, "") sg, err := az.getSecurityGroup()
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -98,10 +98,9 @@ func (az *Cloud) createRouteTable() error {
return err return err
} }
glog.V(10).Infof("RouteTablesClient.Get(%q): start", az.RouteTableName) // Invalidate the cache right after updating
_, err = az.RouteTablesClient.Get(az.ResourceGroup, az.RouteTableName, "") az.rtCache.Delete(az.RouteTableName)
glog.V(10).Infof("RouteTablesClient.Get(%q): end", az.RouteTableName) return nil
return err
} }
// CreateRoute creates the described managed route // CreateRoute creates the described managed route

View File

@ -37,6 +37,7 @@ func TestCreateRoute(t *testing.T) {
Location: "location", Location: "location",
}, },
} }
cloud.rtCache, _ = cloud.newRouteTableCache()
expectedTable := network.RouteTable{ expectedTable := network.RouteTable{
Name: &cloud.RouteTableName, Name: &cloud.RouteTableName,
Location: &cloud.Location, Location: &cloud.Location,

View File

@ -878,6 +878,10 @@ func getTestCloud() (az *Cloud) {
az.VirtualMachineScaleSetVMsClient = newFakeVirtualMachineScaleSetVMsClient() az.VirtualMachineScaleSetVMsClient = newFakeVirtualMachineScaleSetVMsClient()
az.VirtualMachinesClient = newFakeAzureVirtualMachinesClient() az.VirtualMachinesClient = newFakeAzureVirtualMachinesClient()
az.vmSet = newAvailabilitySet(az) az.vmSet = newAvailabilitySet(az)
az.vmCache, _ = az.newVMCache()
az.lbCache, _ = az.newLBCache()
az.nsgCache, _ = az.newNSGCache()
az.rtCache, _ = az.newRouteTableCache()
return az return az
} }

View File

@ -17,19 +17,25 @@ limitations under the License.
package azure package azure
import ( import (
"fmt"
"net/http" "net/http"
"sync"
"time" "time"
"github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/cloudprovider"
) )
var (
vmCacheTTL = time.Minute
lbCacheTTL = 2 * time.Minute
nsgCacheTTL = 2 * time.Minute
rtCacheTTL = 2 * time.Minute
)
// checkExistsFromError inspects an error and returns a true if err is nil, // checkExistsFromError inspects an error and returns a true if err is nil,
// false if error is an autorest.Error with StatusCode=404 and will return the // false if error is an autorest.Error with StatusCode=404 and will return the
// error back if error is another status code or another type of error. // error back if error is another status code or another type of error.
@ -60,75 +66,34 @@ func ignoreStatusNotFoundFromError(err error) error {
return err return err
} }
// cache used by getVirtualMachine
// 15s for expiration duration
var vmCache = newTimedcache(15 * time.Second)
type vmRequest struct {
lock *sync.Mutex
vm *compute.VirtualMachine
}
/// getVirtualMachine calls 'VirtualMachinesClient.Get' with a timed cache /// getVirtualMachine calls 'VirtualMachinesClient.Get' with a timed cache
/// The service side has throttling control that delays responses if there're multiple requests onto certain vm /// The service side has throttling control that delays responses if there're multiple requests onto certain vm
/// resource request in short period. /// resource request in short period.
func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachine, err error) { func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachine, err error) {
vmName := string(nodeName) vmName := string(nodeName)
cachedVM, err := az.vmCache.Get(vmName)
cachedRequest, err := vmCache.GetOrCreate(vmName, func() interface{} {
return &vmRequest{
lock: &sync.Mutex{},
vm: nil,
}
})
if err != nil { if err != nil {
return compute.VirtualMachine{}, err return vm, err
}
request := cachedRequest.(*vmRequest)
if request.vm == nil {
request.lock.Lock()
defer request.lock.Unlock()
if request.vm == nil {
// Currently InstanceView request are used by azure_zones, while the calls come after non-InstanceView
// request. If we first send an InstanceView request and then a non InstanceView request, the second
// request will still hit throttling. This is what happens now for cloud controller manager: In this
// case we do get instance view every time to fulfill the azure_zones requirement without hitting
// throttling.
// Consider adding separate parameter for controlling 'InstanceView' once node update issue #56276 is fixed
vm, err = az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, compute.InstanceView)
exists, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return vm, realErr
}
if !exists {
return vm, cloudprovider.InstanceNotFound
}
request.vm = &vm
}
return *request.vm, nil
} }
glog.V(6).Infof("getVirtualMachine hits cache for(%s)", vmName) if cachedVM == nil {
return *request.vm, nil return vm, cloudprovider.InstanceNotFound
}
return *(cachedVM.(*compute.VirtualMachine)), nil
} }
func (az *Cloud) getRouteTable() (routeTable network.RouteTable, exists bool, err error) { func (az *Cloud) getRouteTable() (routeTable network.RouteTable, exists bool, err error) {
var realErr error cachedRt, err := az.rtCache.Get(az.RouteTableName)
if err != nil {
routeTable, err = az.RouteTablesClient.Get(az.ResourceGroup, az.RouteTableName, "") return routeTable, false, err
exists, realErr = checkResourceExistsFromError(err)
if realErr != nil {
return routeTable, false, realErr
} }
if !exists { if cachedRt == nil {
return routeTable, false, nil return routeTable, false, nil
} }
return routeTable, exists, err return *(cachedRt.(*network.RouteTable)), true, nil
} }
func (az *Cloud) getPublicIPAddress(pipResourceGroup string, pipName string) (pip network.PublicIPAddress, exists bool, err error) { func (az *Cloud) getPublicIPAddress(pipResourceGroup string, pipName string) (pip network.PublicIPAddress, exists bool, err error) {
@ -175,17 +140,105 @@ func (az *Cloud) getSubnet(virtualNetworkName string, subnetName string) (subnet
} }
func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exists bool, err error) { func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exists bool, err error) {
var realErr error cachedLB, err := az.lbCache.Get(name)
if err != nil {
lb, err = az.LoadBalancerClient.Get(az.ResourceGroup, name, "") return lb, false, err
exists, realErr = checkResourceExistsFromError(err)
if realErr != nil {
return lb, false, realErr
} }
if !exists { if cachedLB == nil {
return lb, false, nil return lb, false, nil
} }
return lb, exists, err return *(cachedLB.(*network.LoadBalancer)), true, nil
}
func (az *Cloud) getSecurityGroup() (nsg network.SecurityGroup, err error) {
securityGroup, err := az.nsgCache.Get(az.SecurityGroupName)
if err != nil {
return nsg, err
}
if securityGroup == nil {
return nsg, fmt.Errorf("nsg %q not found", az.SecurityGroupName)
}
return *(securityGroup.(*network.SecurityGroup)), nil
}
func (az *Cloud) newVMCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
// Currently InstanceView request are used by azure_zones, while the calls come after non-InstanceView
// request. If we first send an InstanceView request and then a non InstanceView request, the second
// request will still hit throttling. This is what happens now for cloud controller manager: In this
// case we do get instance view every time to fulfill the azure_zones requirement without hitting
// throttling.
// Consider adding separate parameter for controlling 'InstanceView' once node update issue #56276 is fixed
vm, err := az.VirtualMachinesClient.Get(az.ResourceGroup, key, compute.InstanceView)
exists, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return nil, realErr
}
if !exists {
return nil, nil
}
return &vm, nil
}
return newTimedcache(vmCacheTTL, getter)
}
func (az *Cloud) newLBCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
lb, err := az.LoadBalancerClient.Get(az.ResourceGroup, key, "")
exists, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return nil, realErr
}
if !exists {
return nil, nil
}
return &lb, nil
}
return newTimedcache(lbCacheTTL, getter)
}
func (az *Cloud) newNSGCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
nsg, err := az.SecurityGroupsClient.Get(az.ResourceGroup, key, "")
exists, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return nil, realErr
}
if !exists {
return nil, nil
}
return &nsg, nil
}
return newTimedcache(nsgCacheTTL, getter)
}
func (az *Cloud) newRouteTableCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
rt, err := az.RouteTablesClient.Get(az.ResourceGroup, key, "")
exists, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return nil, realErr
}
if !exists {
return nil, nil
}
return &rt, nil
}
return newTimedcache(rtCacheTTL, getter)
} }