Merge pull request #32719 from liggitt/namespace-admission-wait

Automatic merge from submit-queue

compensate for raft/cache delay in namespace admission

When attempting to create an object in a just-created namespace, the admission plugin can reject the creation attempt for two reasons:

1. The cache of existing namespaces has not observed the creation watch event yet. There was already compensation to perform a live lookup if the namespace was not found in the cache.

2. The live lookup can be performed against a non-leader in a HA etcd setup, and can return NotFound if the non-leader etcd has not observed the creation.

This PR adds a delay, then rechecks the cache before proceeding with the live lookup. This gives both the cache and the backing storage more time to observe the create, before failing the request.
This commit is contained in:
Kubernetes Submit Queue 2016-09-20 22:58:32 -07:00 committed by GitHub
commit c4e5782a9d

View File

@ -21,6 +21,7 @@ import (
"io"
"time"
"github.com/golang/glog"
lru "github.com/hashicorp/golang-lru"
"k8s.io/kubernetes/pkg/client/cache"
@ -38,6 +39,12 @@ const (
PluginName = "NamespaceLifecycle"
// how long a namespace stays in the force live lookup cache before expiration.
forceLiveLookupTTL = 30 * time.Second
// how long to wait for a missing namespace before re-checking the cache (and then doing a live lookup)
// this accomplishes two things:
// 1. It allows a watch-fed cache time to observe a namespace creation event
// 2. It allows time for a namespace creation to distribute to members of a storage cluster,
// so the live lookup has a better chance of succeeding even if it isn't performed against the leader.
missingNamespaceWait = 50 * time.Millisecond
)
func init() {
@ -113,6 +120,19 @@ func (l *lifecycle) Admit(a admission.Attributes) error {
return errors.NewInternalError(err)
}
if !exists && a.GetOperation() == admission.Create {
// give the cache time to observe the namespace before rejecting a create.
// this helps when creating a namespace and immediately creating objects within it.
time.Sleep(missingNamespaceWait)
namespaceObj, exists, err = l.namespaceInformer.GetStore().Get(key)
if err != nil {
return errors.NewInternalError(err)
}
if exists {
glog.V(4).Infof("found %s in cache after waiting", a.GetNamespace())
}
}
// forceLiveLookup if true will skip looking at local cache state and instead always make a live call to server.
forceLiveLookup := false
lruItemObj, ok := l.forceLiveLookupCache.Get(a.GetNamespace())
@ -123,7 +143,7 @@ func (l *lifecycle) Admit(a admission.Attributes) error {
// refuse to operate on non-existent namespaces
if !exists || forceLiveLookup {
// in case of latency in our caches, make a call direct to storage to verify that it truly exists or not
// as a last resort, make a call directly to storage
namespaceObj, err = l.client.Core().Namespaces().Get(a.GetNamespace())
if err != nil {
if errors.IsNotFound(err) {
@ -131,6 +151,7 @@ func (l *lifecycle) Admit(a admission.Attributes) error {
}
return errors.NewInternalError(err)
}
glog.V(4).Infof("found %s via storage lookup", a.GetNamespace())
}
// ensure that we're not trying to create objects in terminating namespaces