Merge pull request #55833 from feiskyer/azure-vmss

Automatic merge from submit-queue (batch tested with PRs 56128, 56004, 56083, 55833, 56042). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Add initial Virtual Machine Scale Sets (VMSS) support in Azure

**What this PR does / why we need it**:

This is the first step of adding Virtual Machine Scale Sets (VMSS) support in Azure, it

- Adds  vmType params to support both vmss and standard in Azure
- Adds initial InstanceID/InstanceType/IP/Routes support for vmss instances
- Master nodes may not belong to any scale sets, so it falls back to VirtualMachinesClient for such instances

Have validated that nodes could be registered and pods could be scheduled and run correctly.

Still more work to do to fully support Azure VMSS. And next steps are tracking at #43287.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:

Part of #43287.

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2017-11-21 17:04:59 -08:00 committed by GitHub
commit c975b13869
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 335 additions and 1 deletions

View File

@ -23,6 +23,7 @@ go_library(
"azure_storage.go",
"azure_storageaccount.go",
"azure_util.go",
"azure_util_vmss.go",
"azure_wrap.go",
"azure_zones.go",
],
@ -57,7 +58,10 @@ go_library(
go_test(
name = "go_default_test",
srcs = ["azure_test.go"],
srcs = [
"azure_test.go",
"azure_util_test.go",
],
importpath = "k8s.io/kubernetes/pkg/cloudprovider/providers/azure",
library = ":go_default_library",
deps = [
@ -66,6 +70,7 @@ go_test(
"//vendor/github.com/Azure/azure-sdk-for-go/arm/compute:go_default_library",
"//vendor/github.com/Azure/azure-sdk-for-go/arm/network:go_default_library",
"//vendor/github.com/Azure/go-autorest/autorest/to:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",

View File

@ -22,6 +22,7 @@ import (
"fmt"
"io"
"io/ioutil"
"strings"
"time"
"k8s.io/client-go/util/flowcontrol"
@ -52,6 +53,9 @@ const (
backoffDurationDefault = 5 // in seconds
backoffJitterDefault = 1.0
maximumLoadBalancerRuleCount = 148 // According to Azure LB rule default limit
vmTypeVMSS = "vmss"
vmTypeStandard = "standard"
)
// Config holds the configuration parsed from the --cloud-config flag
@ -83,6 +87,15 @@ type Config struct {
// the cloudprovider will try to add all nodes to a single backend pool which is forbidden.
// In other words, if you use multiple agent pools (availability sets), you MUST set this field.
PrimaryAvailabilitySetName string `json:"primaryAvailabilitySetName" yaml:"primaryAvailabilitySetName"`
// The type of azure nodes. Candidate valudes are: vmss and standard.
// If not set, it will be default to standard.
VMType string `json:"vmType" yaml:"vmType"`
// The name of the scale set that should be used as the load balancer backend.
// If this is set, the Azure cloudprovider will only add nodes from that scale set to the load
// balancer backend pool. If this is not set, and multiple agent pools (scale sets) are used, then
// the cloudprovider will try to add all nodes to a single backend pool which is forbidden.
// In other words, if you use multiple agent pools (scale sets), you MUST set this field.
PrimaryScaleSetName string `json:"primaryScaleSetName" yaml:"primaryScaleSetName"`
// The ClientID for an AAD application with RBAC access to talk to Azure RM APIs
AADClientID string `json:"aadClientId" yaml:"aadClientId"`
@ -131,6 +144,7 @@ type VirtualMachinesClient interface {
type InterfacesClient interface {
CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error)
Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error)
GetVirtualMachineScaleSetNetworkInterface(resourceGroupName string, virtualMachineScaleSetName string, virtualmachineIndex string, networkInterfaceName string, expand string) (result network.Interface, err error)
}
// LoadBalancersClient defines needed functions for azure network.LoadBalancersClient
@ -185,6 +199,10 @@ type Cloud struct {
resourceRequestBackoff wait.Backoff
metadata *InstanceMetadata
// Clients for vmss.
VirtualMachineScaleSetsClient compute.VirtualMachineScaleSetsClient
VirtualMachineScaleSetVMsClient compute.VirtualMachineScaleSetVMsClient
*BlobDiskController
*ManagedDiskController
*controllerCommon
@ -327,6 +345,20 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
configureUserAgent(&securityGroupsClient.Client)
az.SecurityGroupsClient = securityGroupsClient
virtualMachineScaleSetVMsClient := compute.NewVirtualMachineScaleSetVMsClient(az.SubscriptionID)
az.VirtualMachineScaleSetVMsClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.VirtualMachineScaleSetVMsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.VirtualMachineScaleSetVMsClient.PollingDelay = 5 * time.Second
configureUserAgent(&virtualMachineScaleSetVMsClient.Client)
az.VirtualMachineScaleSetVMsClient = virtualMachineScaleSetVMsClient
virtualMachineScaleSetsClient := compute.NewVirtualMachineScaleSetsClient(az.SubscriptionID)
az.VirtualMachineScaleSetsClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.VirtualMachineScaleSetsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.VirtualMachineScaleSetsClient.PollingDelay = 5 * time.Second
configureUserAgent(&virtualMachineScaleSetsClient.Client)
az.VirtualMachineScaleSetsClient = virtualMachineScaleSetsClient
az.StorageAccountClient = storage.NewAccountsClientWithBaseURI(az.Environment.ResourceManagerEndpoint, az.SubscriptionID)
az.StorageAccountClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
configureUserAgent(&az.StorageAccountClient.Client)
@ -421,6 +453,11 @@ func ParseConfig(configReader io.Reader) (*Config, *azure.Environment, error) {
return nil, nil, err
}
}
if config.VMType != "" {
config.VMType = strings.ToLower(config.VMType)
}
return &config, &env, nil
}

View File

@ -58,6 +58,23 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
return machine, exists, err
}
// GetScaleSetsVMWithRetry invokes az.getScaleSetsVM with exponential backoff retry
func (az *Cloud) GetScaleSetsVMWithRetry(name types.NodeName) (compute.VirtualMachineScaleSetVM, bool, error) {
var machine compute.VirtualMachineScaleSetVM
var exists bool
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
var retryErr error
machine, exists, retryErr = az.getVmssVirtualMachine(name)
if retryErr != nil {
glog.Errorf("GetScaleSetsVMWithRetry backoff: failure, will retry,err=%v", retryErr)
return false, nil
}
glog.V(10).Infof("GetScaleSetsVMWithRetry backoff: success")
return true, nil
})
return machine, exists, err
}
// VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry
func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) {
var machine compute.VirtualMachine

View File

@ -339,6 +339,10 @@ func (fIC fakeAzureInterfacesClient) Get(resourceGroupName string, networkInterf
}
}
func (fIC fakeAzureInterfacesClient) GetVirtualMachineScaleSetNetworkInterface(resourceGroupName string, virtualMachineScaleSetName string, virtualmachineIndex string, networkInterfaceName string, expand string) (result network.Interface, err error) {
return result, nil
}
type fakeAzureVirtualMachinesClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]compute.VirtualMachine

View File

@ -117,6 +117,44 @@ func (az *Cloud) InstanceID(name types.NodeName) (string, error) {
}
}
}
if az.Config.VMType == vmTypeVMSS {
id, err := az.getVmssInstanceID(name)
if err == cloudprovider.InstanceNotFound || err == ErrorNotVmssInstance {
// Retry with standard type because master nodes may not belong to any vmss.
return az.getStandardInstanceID(name)
}
return id, err
}
return az.getStandardInstanceID(name)
}
func (az *Cloud) getVmssInstanceID(name types.NodeName) (string, error) {
var machine compute.VirtualMachineScaleSetVM
var exists bool
var err error
az.operationPollRateLimiter.Accept()
machine, exists, err = az.getVmssVirtualMachine(name)
if err != nil {
if az.CloudProviderBackoff {
glog.V(2).Infof("InstanceID(%s) backing off", name)
machine, exists, err = az.GetScaleSetsVMWithRetry(name)
if err != nil {
glog.V(2).Infof("InstanceID(%s) abort backoff", name)
return "", err
}
} else {
return "", err
}
} else if !exists {
return "", cloudprovider.InstanceNotFound
}
return *machine.ID, nil
}
func (az *Cloud) getStandardInstanceID(name types.NodeName) (string, error) {
var machine compute.VirtualMachine
var exists bool
var err error
@ -168,6 +206,39 @@ func (az *Cloud) InstanceType(name types.NodeName) (string, error) {
}
}
}
if az.Config.VMType == vmTypeVMSS {
machineType, err := az.getVmssInstanceType(name)
if err == cloudprovider.InstanceNotFound || err == ErrorNotVmssInstance {
// Retry with standard type because master nodes may not belong to any vmss.
return az.getStandardInstanceType(name)
}
return machineType, err
}
return az.getStandardInstanceType(name)
}
// getVmssInstanceType gets instance with type vmss.
func (az *Cloud) getVmssInstanceType(name types.NodeName) (string, error) {
machine, exists, err := az.getVmssVirtualMachine(name)
if err != nil {
glog.Errorf("error: az.InstanceType(%s), az.getVmssVirtualMachine(%s) err=%v", name, name, err)
return "", err
} else if !exists {
return "", cloudprovider.InstanceNotFound
}
if machine.Sku.Name != nil {
return *machine.Sku.Name, nil
}
return "", fmt.Errorf("instance type is not set")
}
// getStandardInstanceType gets instance with standard type.
func (az *Cloud) getStandardInstanceType(name types.NodeName) (string, error) {
machine, exists, err := az.getVirtualMachine(name)
if err != nil {
glog.Errorf("error: az.InstanceType(%s), az.getVirtualMachine(%s) err=%v", name, name, err)

View File

@ -402,6 +402,19 @@ outer:
}
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) {
if az.Config.VMType == vmTypeVMSS {
ip, err := az.getIPForVmssMachine(nodeName)
if err == cloudprovider.InstanceNotFound || err == ErrorNotVmssInstance {
return az.getIPForStandardMachine(nodeName)
}
return ip, err
}
return az.getIPForStandardMachine(nodeName)
}
func (az *Cloud) getIPForStandardMachine(nodeName types.NodeName) (string, error) {
az.operationPollRateLimiter.Accept()
machine, exists, err := az.getVirtualMachine(nodeName)
if !exists {

View File

@ -0,0 +1,53 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package azure
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetVmssInstanceID(t *testing.T) {
tests := []struct {
msg string
machineName string
expectError bool
expectedInstanceID string
}{{
msg: "invalid vmss instance name",
machineName: "vmvm",
expectError: true,
},
{
msg: "valid vmss instance name",
machineName: "vm00000Z",
expectError: false,
expectedInstanceID: "35",
},
}
for i, test := range tests {
instanceID, err := getVmssInstanceID(test.machineName)
if test.expectError {
assert.Error(t, err, fmt.Sprintf("TestCase[%d]: %s", i, test.msg))
} else {
assert.Equal(t, test.expectedInstanceID, instanceID, fmt.Sprintf("TestCase[%d]: %s", i, test.msg))
}
}
}

View File

@ -0,0 +1,102 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package azure
import (
"fmt"
"strconv"
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/cloudprovider"
)
func (az *Cloud) getIPForVmssMachine(nodeName types.NodeName) (string, error) {
az.operationPollRateLimiter.Accept()
machine, exists, err := az.getVmssVirtualMachine(nodeName)
if !exists {
return "", cloudprovider.InstanceNotFound
}
if err != nil {
glog.Errorf("error: az.getIPForVmssMachine(%s), az.getVmssVirtualMachine(%s), err=%v", nodeName, nodeName, err)
return "", err
}
nicID, err := getPrimaryInterfaceIDForVmssMachine(machine)
if err != nil {
glog.Errorf("error: az.getIPForVmssMachine(%s), getPrimaryInterfaceID(%v), err=%v", nodeName, machine, err)
return "", err
}
nicName, err := getLastSegment(nicID)
if err != nil {
glog.Errorf("error: az.getIPForVmssMachine(%s), getLastSegment(%s), err=%v", nodeName, nicID, err)
return "", err
}
az.operationPollRateLimiter.Accept()
glog.V(10).Infof("InterfacesClient.Get(%q): start", nicName)
nic, err := az.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface(az.ResourceGroup, az.Config.PrimaryScaleSetName, *machine.InstanceID, nicName, "")
glog.V(10).Infof("InterfacesClient.Get(%q): end", nicName)
if err != nil {
glog.Errorf("error: az.getIPForVmssMachine(%s), az.GetVirtualMachineScaleSetNetworkInterface.Get(%s, %s, %s), err=%v", nodeName, az.ResourceGroup, nicName, "", err)
return "", err
}
ipConfig, err := getPrimaryIPConfig(nic)
if err != nil {
glog.Errorf("error: az.getIPForVmssMachine(%s), getPrimaryIPConfig(%v), err=%v", nodeName, nic, err)
return "", err
}
targetIP := *ipConfig.PrivateIPAddress
return targetIP, nil
}
// This returns the full identifier of the primary NIC for the given VM.
func getPrimaryInterfaceIDForVmssMachine(machine compute.VirtualMachineScaleSetVM) (string, error) {
if len(*machine.NetworkProfile.NetworkInterfaces) == 1 {
return *(*machine.NetworkProfile.NetworkInterfaces)[0].ID, nil
}
for _, ref := range *machine.NetworkProfile.NetworkInterfaces {
if *ref.Primary {
return *ref.ID, nil
}
}
return "", fmt.Errorf("failed to find a primary nic for the vm. vmname=%q", *machine.Name)
}
// machineName is composed of computerNamePrefix and 36-based instanceID.
// And instanceID part if in fixed length of 6 characters.
// Refer https://msftstack.wordpress.com/2017/05/10/figuring-out-azure-vm-scale-set-machine-names/.
func getVmssInstanceID(machineName string) (string, error) {
nameLength := len(machineName)
if nameLength < 6 {
return "", ErrorNotVmssInstance
}
instanceID, err := strconv.ParseUint(machineName[nameLength-6:], 36, 64)
if err != nil {
return "", ErrorNotVmssInstance
}
return fmt.Sprintf("%d", instanceID), nil
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package azure
import (
"errors"
"net/http"
"github.com/Azure/azure-sdk-for-go/arm/compute"
@ -26,6 +27,11 @@ import (
"k8s.io/apimachinery/pkg/types"
)
var (
// ErrorNotVmssInstance indicates an instance is not belongint to any vmss.
ErrorNotVmssInstance = errors.New("not a vmss instance")
)
// checkExistsFromError inspects an error and returns a true if err is nil,
// false if error is an autorest.Error with StatusCode=404 and will return the
// error back if error is another status code or another type of error.
@ -74,6 +80,32 @@ func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualM
return vm, exists, err
}
func (az *Cloud) getVmssVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachineScaleSetVM, exists bool, err error) {
var realErr error
vmName := string(nodeName)
instanceID, err := getVmssInstanceID(vmName)
if err != nil {
return vm, false, err
}
az.operationPollRateLimiter.Accept()
glog.V(10).Infof("VirtualMachineScaleSetVMsClient.Get(%s): start", vmName)
vm, err = az.VirtualMachineScaleSetVMsClient.Get(az.ResourceGroup, az.PrimaryScaleSetName, instanceID)
glog.V(10).Infof("VirtualMachineScaleSetVMsClient.Get(%s): end", vmName)
exists, realErr = checkResourceExistsFromError(err)
if realErr != nil {
return vm, false, realErr
}
if !exists {
return vm, false, nil
}
return vm, exists, err
}
func (az *Cloud) getRouteTable() (routeTable network.RouteTable, exists bool, err error) {
var realErr error