Merge pull request #93260 from nckturner/return-true-for-enabled-ecr-provider

Fix ECR provider startup latency
This commit is contained in:
Kubernetes Prow Robot 2021-04-08 14:26:55 -07:00 committed by GitHub
commit 4c9e55a44f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 96 additions and 11 deletions

View File

@ -20,7 +20,9 @@ import (
"encoding/base64"
"errors"
"fmt"
"io/ioutil"
"net/url"
"os"
"regexp"
"strings"
"sync"
@ -38,13 +40,19 @@ import (
"k8s.io/kubernetes/pkg/credentialprovider"
)
var ecrPattern = regexp.MustCompile(`^(\d{12})\.dkr\.ecr(\-fips)?\.([a-zA-Z0-9][a-zA-Z0-9-_]*)\.amazonaws\.com(\.cn)?$`)
var (
ecrPattern = regexp.MustCompile(`^(\d{12})\.dkr\.ecr(\-fips)?\.([a-zA-Z0-9][a-zA-Z0-9-_]*)\.amazonaws\.com(\.cn)?$`)
once sync.Once
isEC2 bool
)
// init registers a credential provider for each registryURLTemplate and creates
// an ECR token getter factory with a new cache to store token getters
func init() {
credentialprovider.RegisterCredentialProvider("amazon-ecr",
newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)}))
newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)},
ec2ValidationImpl,
))
}
// ecrProvider is a DockerConfigProvider that gets and refreshes tokens
@ -52,20 +60,74 @@ func init() {
type ecrProvider struct {
cache cache.Store
getterFactory tokenGetterFactory
isEC2 ec2ValidationFunc
}
var _ credentialprovider.DockerConfigProvider = &ecrProvider{}
func newECRProvider(getterFactory tokenGetterFactory) *ecrProvider {
func newECRProvider(getterFactory tokenGetterFactory, isEC2 ec2ValidationFunc) *ecrProvider {
return &ecrProvider{
cache: cache.NewExpirationStore(stringKeyFunc, &ecrExpirationPolicy{}),
getterFactory: getterFactory,
isEC2: isEC2,
}
}
// Enabled implements DockerConfigProvider.Enabled. Enabled is true if AWS
// credentials are found.
// Enabled implements DockerConfigProvider.Enabled.
func (p *ecrProvider) Enabled() bool {
return true
}
type ec2ValidationFunc func() bool
// ec2ValidationImpl returns true if we detect
// an EC2 vm based on checking for the EC2 system UUID, the asset tag (for nitro
// instances), or instance credentials if the UUID is not present.
// Ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/identify_ec2_instances.html
func ec2ValidationImpl() bool {
return tryValidateEC2UUID() || tryValidateEC2Creds()
}
func tryValidateEC2UUID() bool {
hypervisor_uuid := "/sys/hypervisor/uuid"
product_uuid := "/sys/devices/virtual/dmi/id/product_uuid"
asset_tag := "/sys/devices/virtual/dmi/id/board_asset_tag"
if _, err := os.Stat(hypervisor_uuid); err == nil {
b, err := ioutil.ReadFile(hypervisor_uuid)
if err != nil {
klog.Errorf("error checking if this is an EC2 instance: %v", err)
} else if strings.HasPrefix(string(b), "EC2") || strings.HasPrefix(string(b), "ec2") {
klog.V(5).Infof("found 'ec2' in uuid %v from %v, enabling legacy AWS credential provider", string(b), hypervisor_uuid)
return true
}
}
if _, err := os.Stat(product_uuid); err == nil {
b, err := ioutil.ReadFile(product_uuid)
if err != nil {
klog.Errorf("error checking if this is an EC2 instance: %v", err)
} else if strings.HasPrefix(string(b), "EC2") || strings.HasPrefix(string(b), "ec2") {
klog.V(5).Infof("found 'ec2' in uuid %v from %v, enabling legacy AWS credential provider", string(b), product_uuid)
return true
}
}
if _, err := os.Stat(asset_tag); err == nil {
b, err := ioutil.ReadFile(asset_tag)
s := strings.TrimSpace(string(b))
if err != nil {
klog.Errorf("error checking if this is an EC2 instance: %v", err)
} else if strings.HasPrefix(s, "i-") && len(s) == 19 {
// Instance ID's are 19 characters plus newline
klog.V(5).Infof("found instance ID in %v from %v, enabling legacy AWS credential provider", string(b), asset_tag)
return true
}
}
return false
}
func tryValidateEC2Creds() bool {
sess, err := session.NewSessionWithOptions(session.Options{
SharedConfigState: session.SharedConfigEnable,
})
@ -77,6 +139,7 @@ func (p *ecrProvider) Enabled() bool {
klog.Errorf("while getting AWS credentials %v", err)
return false
}
klog.V(5).Infof("found aws credentials, enabling legacy AWS credential provider")
return true
}
@ -85,12 +148,25 @@ func (p *ecrProvider) Enabled() bool {
func (p *ecrProvider) Provide(image string) credentialprovider.DockerConfig {
parsed, err := parseRepoURL(image)
if err != nil {
klog.V(3).Info(err)
return credentialprovider.DockerConfig{}
}
// To prevent the AWS SDK from causing latency on non-aws platforms, only test if we are on
// EC2 or have access to credentials once. Attempt to do it without network calls by checking
// for certain EC2-specific files. Otherwise, we ask the SDK to initialize a session to see if
// credentials are available. On non-aws platforms, especially when a metadata endpoint is blocked,
// this has been shown to cause 20 seconds of latency due to SDK retries
// (see https://github.com/kubernetes/kubernetes/issues/92162)
once.Do(func() {
isEC2 = p.isEC2()
})
if !isEC2 {
return credentialprovider.DockerConfig{}
}
if cfg, exists := p.getFromCache(parsed); exists {
klog.V(6).Infof("Got ECR credentials from cache for %s", parsed.registry)
klog.V(3).Infof("Got ECR credentials from cache for %s", parsed.registry)
return cfg
}
klog.V(3).Info("unable to get ECR credentials from cache, checking ECR API")

View File

@ -154,6 +154,10 @@ func TestParseRepoURLFail(t *testing.T) {
}
}
func isAlwaysEC2() bool {
return true
}
func TestECRProvide(t *testing.T) {
registry := "123456789012.dkr.ecr.lala-land-1.amazonaws.com"
otherRegistries := []string{
@ -168,7 +172,7 @@ func TestECRProvide(t *testing.T) {
password: password,
endpoint: registry,
},
})
}, isAlwaysEC2)
keyring := &credentialprovider.BasicDockerKeyring{}
keyring.Add(p.Provide(image))
@ -213,7 +217,7 @@ func TestECRProvideCached(t *testing.T) {
endpoint: registry,
randomizePassword: true,
},
})
}, isAlwaysEC2)
image1 := path.Join(registry, "foo/bar")
image2 := path.Join(registry, "bar/baz")
keyring := &credentialprovider.BasicDockerKeyring{}
@ -266,7 +270,7 @@ func TestChinaECRProvide(t *testing.T) {
password: password,
endpoint: registry,
},
})
}, isAlwaysEC2)
keyring := &credentialprovider.BasicDockerKeyring{}
keyring.Add(p.Provide(image))
// Verify that we get the expected username/password combo for
@ -310,7 +314,7 @@ func TestChinaECRProvideCached(t *testing.T) {
endpoint: registry,
randomizePassword: true,
},
})
}, isAlwaysEC2)
image := path.Join(registry, "foo/bar")
keyring := &credentialprovider.BasicDockerKeyring{}
keyring.Add(p.Provide(image))
@ -331,3 +335,8 @@ func TestChinaECRProvideCached(t *testing.T) {
t.Errorf("cached credentials do not match")
}
}
func BenchmarkSetupLatency(b *testing.B) {
p := newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)}, ec2ValidationImpl)
_ = p.Enabled()
}