mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Fix ECR provider startup latency
* Before this change, even on non-AWS platforms, the Enabled() check attempts to make calls to the metadata endpoint when the session and credentials are initialized (in order to determine if the provider should be initialized at all). * This can cause latency because the SDK times out and retries -- up to 20 seconds of latency has been observed on non-AWS platforms when the metadata IP was blocked with an iptables rule. * Instead, check once if we are running on an EC2 platform, first trying to find the EC2 UUID in system files, and second attempting to get credentials. * Add a benchmark test that includes intialization and the credential check.
This commit is contained in:
parent
23d4b3b4f0
commit
d422a92e66
@ -20,7 +20,9 @@ import (
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -38,13 +40,19 @@ import (
|
||||
"k8s.io/kubernetes/pkg/credentialprovider"
|
||||
)
|
||||
|
||||
var ecrPattern = regexp.MustCompile(`^(\d{12})\.dkr\.ecr(\-fips)?\.([a-zA-Z0-9][a-zA-Z0-9-_]*)\.amazonaws\.com(\.cn)?$`)
|
||||
var (
|
||||
ecrPattern = regexp.MustCompile(`^(\d{12})\.dkr\.ecr(\-fips)?\.([a-zA-Z0-9][a-zA-Z0-9-_]*)\.amazonaws\.com(\.cn)?$`)
|
||||
once sync.Once
|
||||
isEC2 bool
|
||||
)
|
||||
|
||||
// init registers a credential provider for each registryURLTemplate and creates
|
||||
// an ECR token getter factory with a new cache to store token getters
|
||||
func init() {
|
||||
credentialprovider.RegisterCredentialProvider("amazon-ecr",
|
||||
newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)}))
|
||||
newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)},
|
||||
ec2ValidationImpl,
|
||||
))
|
||||
}
|
||||
|
||||
// ecrProvider is a DockerConfigProvider that gets and refreshes tokens
|
||||
@ -52,20 +60,74 @@ func init() {
|
||||
type ecrProvider struct {
|
||||
cache cache.Store
|
||||
getterFactory tokenGetterFactory
|
||||
isEC2 ec2ValidationFunc
|
||||
}
|
||||
|
||||
var _ credentialprovider.DockerConfigProvider = &ecrProvider{}
|
||||
|
||||
func newECRProvider(getterFactory tokenGetterFactory) *ecrProvider {
|
||||
func newECRProvider(getterFactory tokenGetterFactory, isEC2 ec2ValidationFunc) *ecrProvider {
|
||||
return &ecrProvider{
|
||||
cache: cache.NewExpirationStore(stringKeyFunc, &ecrExpirationPolicy{}),
|
||||
getterFactory: getterFactory,
|
||||
isEC2: isEC2,
|
||||
}
|
||||
}
|
||||
|
||||
// Enabled implements DockerConfigProvider.Enabled. Enabled is true if AWS
|
||||
// credentials are found.
|
||||
// Enabled implements DockerConfigProvider.Enabled.
|
||||
func (p *ecrProvider) Enabled() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
type ec2ValidationFunc func() bool
|
||||
|
||||
// ec2ValidationImpl returns true if we detect
|
||||
// an EC2 vm based on checking for the EC2 system UUID, the asset tag (for nitro
|
||||
// instances), or instance credentials if the UUID is not present.
|
||||
// Ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/identify_ec2_instances.html
|
||||
func ec2ValidationImpl() bool {
|
||||
return tryValidateEC2UUID() || tryValidateEC2Creds()
|
||||
}
|
||||
|
||||
func tryValidateEC2UUID() bool {
|
||||
hypervisor_uuid := "/sys/hypervisor/uuid"
|
||||
product_uuid := "/sys/devices/virtual/dmi/id/product_uuid"
|
||||
asset_tag := "/sys/devices/virtual/dmi/id/board_asset_tag"
|
||||
|
||||
if _, err := os.Stat(hypervisor_uuid); err == nil {
|
||||
b, err := ioutil.ReadFile(hypervisor_uuid)
|
||||
if err != nil {
|
||||
klog.Errorf("error checking if this is an EC2 instance: %v", err)
|
||||
} else if strings.HasPrefix(string(b), "EC2") || strings.HasPrefix(string(b), "ec2") {
|
||||
klog.V(5).Infof("found 'ec2' in uuid %v from %v, enabling legacy AWS credential provider", string(b), hypervisor_uuid)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := os.Stat(product_uuid); err == nil {
|
||||
b, err := ioutil.ReadFile(product_uuid)
|
||||
if err != nil {
|
||||
klog.Errorf("error checking if this is an EC2 instance: %v", err)
|
||||
} else if strings.HasPrefix(string(b), "EC2") || strings.HasPrefix(string(b), "ec2") {
|
||||
klog.V(5).Infof("found 'ec2' in uuid %v from %v, enabling legacy AWS credential provider", string(b), product_uuid)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := os.Stat(asset_tag); err == nil {
|
||||
b, err := ioutil.ReadFile(asset_tag)
|
||||
s := strings.TrimSpace(string(b))
|
||||
if err != nil {
|
||||
klog.Errorf("error checking if this is an EC2 instance: %v", err)
|
||||
} else if strings.HasPrefix(s, "i-") && len(s) == 19 {
|
||||
// Instance ID's are 19 characters plus newline
|
||||
klog.V(5).Infof("found instance ID in %v from %v, enabling legacy AWS credential provider", string(b), asset_tag)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func tryValidateEC2Creds() bool {
|
||||
sess, err := session.NewSessionWithOptions(session.Options{
|
||||
SharedConfigState: session.SharedConfigEnable,
|
||||
})
|
||||
@ -77,6 +139,7 @@ func (p *ecrProvider) Enabled() bool {
|
||||
klog.Errorf("while getting AWS credentials %v", err)
|
||||
return false
|
||||
}
|
||||
klog.V(5).Infof("found aws credentials, enabling legacy AWS credential provider")
|
||||
return true
|
||||
}
|
||||
|
||||
@ -85,12 +148,25 @@ func (p *ecrProvider) Enabled() bool {
|
||||
func (p *ecrProvider) Provide(image string) credentialprovider.DockerConfig {
|
||||
parsed, err := parseRepoURL(image)
|
||||
if err != nil {
|
||||
klog.V(3).Info(err)
|
||||
return credentialprovider.DockerConfig{}
|
||||
}
|
||||
|
||||
// To prevent the AWS SDK from causing latency on non-aws platforms, only test if we are on
|
||||
// EC2 or have access to credentials once. Attempt to do it without network calls by checking
|
||||
// for certain EC2-specific files. Otherwise, we ask the SDK to initialize a session to see if
|
||||
// credentials are available. On non-aws platforms, especially when a metadata endpoint is blocked,
|
||||
// this has been shown to cause 20 seconds of latency due to SDK retries
|
||||
// (see https://github.com/kubernetes/kubernetes/issues/92162)
|
||||
once.Do(func() {
|
||||
isEC2 = p.isEC2()
|
||||
})
|
||||
|
||||
if !isEC2 {
|
||||
return credentialprovider.DockerConfig{}
|
||||
}
|
||||
|
||||
if cfg, exists := p.getFromCache(parsed); exists {
|
||||
klog.V(6).Infof("Got ECR credentials from cache for %s", parsed.registry)
|
||||
klog.V(3).Infof("Got ECR credentials from cache for %s", parsed.registry)
|
||||
return cfg
|
||||
}
|
||||
klog.V(3).Info("unable to get ECR credentials from cache, checking ECR API")
|
||||
|
@ -154,6 +154,10 @@ func TestParseRepoURLFail(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func isAlwaysEC2() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func TestECRProvide(t *testing.T) {
|
||||
registry := "123456789012.dkr.ecr.lala-land-1.amazonaws.com"
|
||||
otherRegistries := []string{
|
||||
@ -168,7 +172,7 @@ func TestECRProvide(t *testing.T) {
|
||||
password: password,
|
||||
endpoint: registry,
|
||||
},
|
||||
})
|
||||
}, isAlwaysEC2)
|
||||
keyring := &credentialprovider.BasicDockerKeyring{}
|
||||
keyring.Add(p.Provide(image))
|
||||
|
||||
@ -213,7 +217,7 @@ func TestECRProvideCached(t *testing.T) {
|
||||
endpoint: registry,
|
||||
randomizePassword: true,
|
||||
},
|
||||
})
|
||||
}, isAlwaysEC2)
|
||||
image1 := path.Join(registry, "foo/bar")
|
||||
image2 := path.Join(registry, "bar/baz")
|
||||
keyring := &credentialprovider.BasicDockerKeyring{}
|
||||
@ -266,7 +270,7 @@ func TestChinaECRProvide(t *testing.T) {
|
||||
password: password,
|
||||
endpoint: registry,
|
||||
},
|
||||
})
|
||||
}, isAlwaysEC2)
|
||||
keyring := &credentialprovider.BasicDockerKeyring{}
|
||||
keyring.Add(p.Provide(image))
|
||||
// Verify that we get the expected username/password combo for
|
||||
@ -310,7 +314,7 @@ func TestChinaECRProvideCached(t *testing.T) {
|
||||
endpoint: registry,
|
||||
randomizePassword: true,
|
||||
},
|
||||
})
|
||||
}, isAlwaysEC2)
|
||||
image := path.Join(registry, "foo/bar")
|
||||
keyring := &credentialprovider.BasicDockerKeyring{}
|
||||
keyring.Add(p.Provide(image))
|
||||
@ -331,3 +335,8 @@ func TestChinaECRProvideCached(t *testing.T) {
|
||||
t.Errorf("cached credentials do not match")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSetupLatency(b *testing.B) {
|
||||
p := newECRProvider(&ecrTokenGetterFactory{cache: make(map[string]tokenGetter)}, ec2ValidationImpl)
|
||||
_ = p.Enabled()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user