mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 01:06:27 +00:00
Merge pull request #49142 from joelsmith/slowstart
Automatic merge from submit-queue (batch tested with PRs 50602, 51561, 51703, 51748, 49142) Slow-start batch pod creation of rs, rc, ds, jobs Prevent too-large replicas from generating enormous numbers of events by creating only a few pods at a time, then increasing the batch size when pod creations succeed. Stop creating batches of pods when any pod creation errors are encountered. Todo: - [x] Add automated tests - [x] Test ds Fixes https://github.com/kubernetes/kubernetes/issues/49145 **Release note**: ```release-note controllers backoff better in face of quota denial ```
This commit is contained in:
commit
28857a2f02
@ -65,6 +65,21 @@ const (
|
|||||||
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
|
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
|
||||||
// latency/pod at the scale of 3000 pods over 100 nodes.
|
// latency/pod at the scale of 3000 pods over 100 nodes.
|
||||||
ExpectationsTimeout = 5 * time.Minute
|
ExpectationsTimeout = 5 * time.Minute
|
||||||
|
// When batching pod creates, SlowStartInitialBatchSize is the size of the
|
||||||
|
// inital batch. The size of each successive batch is twice the size of
|
||||||
|
// the previous batch. For example, for a value of 1, batch sizes would be
|
||||||
|
// 1, 2, 4, 8, ... and for a value of 10, batch sizes would be
|
||||||
|
// 10, 20, 40, 80, ... Setting the value higher means that quota denials
|
||||||
|
// will result in more doomed API calls and associated event spam. Setting
|
||||||
|
// the value lower will result in more API call round trip periods for
|
||||||
|
// large batches.
|
||||||
|
//
|
||||||
|
// Given a number of pods to start "N":
|
||||||
|
// The number of doomed calls per sync once quota is exceeded is given by:
|
||||||
|
// min(N,SlowStartInitialBatchSize)
|
||||||
|
// The number of batches is given by:
|
||||||
|
// 1+floor(log_2(ceil(N/SlowStartInitialBatchSize)))
|
||||||
|
SlowStartInitialBatchSize = 1
|
||||||
)
|
)
|
||||||
|
|
||||||
var UpdateTaintBackoff = wait.Backoff{
|
var UpdateTaintBackoff = wait.Backoff{
|
||||||
@ -606,11 +621,13 @@ func (r RealPodControl) DeletePod(namespace string, podID string, object runtime
|
|||||||
|
|
||||||
type FakePodControl struct {
|
type FakePodControl struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
Templates []v1.PodTemplateSpec
|
Templates []v1.PodTemplateSpec
|
||||||
ControllerRefs []metav1.OwnerReference
|
ControllerRefs []metav1.OwnerReference
|
||||||
DeletePodName []string
|
DeletePodName []string
|
||||||
Patches [][]byte
|
Patches [][]byte
|
||||||
Err error
|
Err error
|
||||||
|
CreateLimit int
|
||||||
|
CreateCallCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ PodControlInterface = &FakePodControl{}
|
var _ PodControlInterface = &FakePodControl{}
|
||||||
@ -628,6 +645,10 @@ func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error {
|
|||||||
func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error {
|
func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error {
|
||||||
f.Lock()
|
f.Lock()
|
||||||
defer f.Unlock()
|
defer f.Unlock()
|
||||||
|
f.CreateCallCount++
|
||||||
|
if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
|
||||||
|
return fmt.Errorf("Not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
|
||||||
|
}
|
||||||
f.Templates = append(f.Templates, *spec)
|
f.Templates = append(f.Templates, *spec)
|
||||||
if f.Err != nil {
|
if f.Err != nil {
|
||||||
return f.Err
|
return f.Err
|
||||||
@ -638,6 +659,10 @@ func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec,
|
|||||||
func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
||||||
f.Lock()
|
f.Lock()
|
||||||
defer f.Unlock()
|
defer f.Unlock()
|
||||||
|
f.CreateCallCount++
|
||||||
|
if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
|
||||||
|
return fmt.Errorf("Not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
|
||||||
|
}
|
||||||
f.Templates = append(f.Templates, *spec)
|
f.Templates = append(f.Templates, *spec)
|
||||||
f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
|
f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
|
||||||
if f.Err != nil {
|
if f.Err != nil {
|
||||||
@ -649,6 +674,10 @@ func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.
|
|||||||
func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
|
||||||
f.Lock()
|
f.Lock()
|
||||||
defer f.Unlock()
|
defer f.Unlock()
|
||||||
|
f.CreateCallCount++
|
||||||
|
if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
|
||||||
|
return fmt.Errorf("Not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
|
||||||
|
}
|
||||||
f.Templates = append(f.Templates, *template)
|
f.Templates = append(f.Templates, *template)
|
||||||
f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
|
f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
|
||||||
if f.Err != nil {
|
if f.Err != nil {
|
||||||
@ -674,6 +703,8 @@ func (f *FakePodControl) Clear() {
|
|||||||
f.Templates = []v1.PodTemplateSpec{}
|
f.Templates = []v1.PodTemplateSpec{}
|
||||||
f.ControllerRefs = []metav1.OwnerReference{}
|
f.ControllerRefs = []metav1.OwnerReference{}
|
||||||
f.Patches = [][]byte{}
|
f.Patches = [][]byte{}
|
||||||
|
f.CreateLimit = 0
|
||||||
|
f.CreateCallCount = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
|
// ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
|
||||||
|
@ -53,6 +53,7 @@ go_library(
|
|||||||
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
|
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||||
|
"//vendor/k8s.io/client-go/util/integer:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -46,6 +46,7 @@ import (
|
|||||||
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
|
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
|
"k8s.io/client-go/util/integer"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
|
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
|
||||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||||
@ -904,31 +905,54 @@ func (dsc *DaemonSetsController) syncNodes(ds *extensions.DaemonSet, podsToDelet
|
|||||||
|
|
||||||
glog.V(4).Infof("Nodes needing daemon pods for daemon set %s: %+v, creating %d", ds.Name, nodesNeedingDaemonPods, createDiff)
|
glog.V(4).Infof("Nodes needing daemon pods for daemon set %s: %+v, creating %d", ds.Name, nodesNeedingDaemonPods, createDiff)
|
||||||
createWait := sync.WaitGroup{}
|
createWait := sync.WaitGroup{}
|
||||||
createWait.Add(createDiff)
|
|
||||||
template := util.CreatePodTemplate(ds.Spec.Template, ds.Spec.TemplateGeneration, hash)
|
template := util.CreatePodTemplate(ds.Spec.Template, ds.Spec.TemplateGeneration, hash)
|
||||||
for i := 0; i < createDiff; i++ {
|
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||||
go func(ix int) {
|
// and double with each successful iteration in a kind of "slow start".
|
||||||
defer createWait.Done()
|
// This handles attempts to start large numbers of pods that would
|
||||||
err := dsc.podControl.CreatePodsOnNode(nodesNeedingDaemonPods[ix], ds.Namespace, &template, ds, metav1.NewControllerRef(ds, controllerKind))
|
// likely all fail with the same error. For example a project with a
|
||||||
if err != nil && errors.IsTimeout(err) {
|
// low quota that attempts to create a large number of pods will be
|
||||||
// Pod is created but its initialization has timed out.
|
// prevented from spamming the API service with the pod create requests
|
||||||
// If the initialization is successful eventually, the
|
// after one of its pods fails. Conveniently, this also prevents the
|
||||||
// controller will observe the creation via the informer.
|
// event spam that those failures would generate.
|
||||||
// If the initialization fails, or if the pod keeps
|
batchSize := integer.IntMin(createDiff, controller.SlowStartInitialBatchSize)
|
||||||
// uninitialized for a long time, the informer will not
|
for pos := 0; createDiff > pos; batchSize, pos = integer.IntMin(2*batchSize, createDiff-(pos+batchSize)), pos+batchSize {
|
||||||
// receive any update, and the controller will create a new
|
errorCount := len(errCh)
|
||||||
// pod when the expectation expires.
|
createWait.Add(batchSize)
|
||||||
return
|
for i := pos; i < pos+batchSize; i++ {
|
||||||
}
|
go func(ix int) {
|
||||||
if err != nil {
|
defer createWait.Done()
|
||||||
glog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name)
|
err := dsc.podControl.CreatePodsOnNode(nodesNeedingDaemonPods[ix], ds.Namespace, &template, ds, metav1.NewControllerRef(ds, controllerKind))
|
||||||
|
if err != nil && errors.IsTimeout(err) {
|
||||||
|
// Pod is created but its initialization has timed out.
|
||||||
|
// If the initialization is successful eventually, the
|
||||||
|
// controller will observe the creation via the informer.
|
||||||
|
// If the initialization fails, or if the pod keeps
|
||||||
|
// uninitialized for a long time, the informer will not
|
||||||
|
// receive any update, and the controller will create a new
|
||||||
|
// pod when the expectation expires.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
glog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name)
|
||||||
|
dsc.expectations.CreationObserved(dsKey)
|
||||||
|
errCh <- err
|
||||||
|
utilruntime.HandleError(err)
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
createWait.Wait()
|
||||||
|
// any skipped pods that we never attempted to start shouldn't be expected.
|
||||||
|
skippedPods := createDiff - batchSize
|
||||||
|
if errorCount < len(errCh) && skippedPods > 0 {
|
||||||
|
glog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for set %q/%q", skippedPods, ds.Namespace, ds.Name)
|
||||||
|
for i := 0; i < skippedPods; i++ {
|
||||||
dsc.expectations.CreationObserved(dsKey)
|
dsc.expectations.CreationObserved(dsKey)
|
||||||
errCh <- err
|
|
||||||
utilruntime.HandleError(err)
|
|
||||||
}
|
}
|
||||||
}(i)
|
// The skipped pods will be retried later. The next controller resync will
|
||||||
|
// retry the slow start process.
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
createWait.Wait()
|
|
||||||
|
|
||||||
glog.V(4).Infof("Pods to delete for daemon set %s: %+v, deleting %d", ds.Name, podsToDelete, deleteDiff)
|
glog.V(4).Infof("Pods to delete for daemon set %s: %+v, deleting %d", ds.Name, podsToDelete, deleteDiff)
|
||||||
deleteWait := sync.WaitGroup{}
|
deleteWait := sync.WaitGroup{}
|
||||||
|
@ -412,6 +412,28 @@ func TestSimpleDaemonSetLaunchesPods(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Simulate a cluster with 100 nodes, but simulate a limit (like a quota limit)
|
||||||
|
// of 10 pods, and verify that the ds doesn't make 100 create calls per sync pass
|
||||||
|
func TestSimpleDaemonSetPodCreateErrors(t *testing.T) {
|
||||||
|
for _, strategy := range updateStrategies() {
|
||||||
|
ds := newDaemonSet("foo")
|
||||||
|
ds.Spec.UpdateStrategy = *strategy
|
||||||
|
manager, podControl, _ := newTestController(ds)
|
||||||
|
podControl.FakePodControl.CreateLimit = 10
|
||||||
|
addNodes(manager.nodeStore, 0, podControl.FakePodControl.CreateLimit*10, nil)
|
||||||
|
manager.dsStore.Add(ds)
|
||||||
|
syncAndValidateDaemonSets(t, manager, ds, podControl, podControl.FakePodControl.CreateLimit, 0, 0)
|
||||||
|
expectedLimit := 0
|
||||||
|
for pass := uint8(0); expectedLimit <= podControl.FakePodControl.CreateLimit; pass++ {
|
||||||
|
expectedLimit += controller.SlowStartInitialBatchSize << pass
|
||||||
|
}
|
||||||
|
if podControl.FakePodControl.CreateCallCount > expectedLimit {
|
||||||
|
t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", podControl.FakePodControl.CreateLimit*2, podControl.FakePodControl.CreateCallCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSimpleDaemonSetUpdatesStatusAfterLaunchingPods(t *testing.T) {
|
func TestSimpleDaemonSetUpdatesStatusAfterLaunchingPods(t *testing.T) {
|
||||||
for _, strategy := range updateStrategies() {
|
for _, strategy := range updateStrategies() {
|
||||||
ds := newDaemonSet("foo")
|
ds := newDaemonSet("foo")
|
||||||
|
@ -33,6 +33,7 @@ go_library(
|
|||||||
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
|
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||||
|
"//vendor/k8s.io/client-go/util/integer:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -39,6 +39,7 @@ import (
|
|||||||
corelisters "k8s.io/client-go/listers/core/v1"
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
|
"k8s.io/client-go/util/integer"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/util/metrics"
|
"k8s.io/kubernetes/pkg/util/metrics"
|
||||||
@ -652,34 +653,60 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b
|
|||||||
|
|
||||||
active += diff
|
active += diff
|
||||||
wait := sync.WaitGroup{}
|
wait := sync.WaitGroup{}
|
||||||
wait.Add(int(diff))
|
|
||||||
for i := int32(0); i < diff; i++ {
|
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||||
go func() {
|
// and double with each successful iteration in a kind of "slow start".
|
||||||
defer wait.Done()
|
// This handles attempts to start large numbers of pods that would
|
||||||
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
|
// likely all fail with the same error. For example a project with a
|
||||||
if err != nil && errors.IsTimeout(err) {
|
// low quota that attempts to create a large number of pods will be
|
||||||
// Pod is created but its initialization has timed out.
|
// prevented from spamming the API service with the pod create requests
|
||||||
// If the initialization is successful eventually, the
|
// after one of its pods fails. Conveniently, this also prevents the
|
||||||
// controller will observe the creation via the informer.
|
// event spam that those failures would generate.
|
||||||
// If the initialization fails, or if the pod keeps
|
for batchSize := int32(integer.IntMin(int(diff), controller.SlowStartInitialBatchSize)); diff > 0; batchSize = integer.Int32Min(2*batchSize, diff) {
|
||||||
// uninitialized for a long time, the informer will not
|
errorCount := len(errCh)
|
||||||
// receive any update, and the controller will create a new
|
wait.Add(int(batchSize))
|
||||||
// pod when the expectation expires.
|
for i := int32(0); i < batchSize; i++ {
|
||||||
return
|
go func() {
|
||||||
}
|
defer wait.Done()
|
||||||
if err != nil {
|
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
|
||||||
defer utilruntime.HandleError(err)
|
if err != nil && errors.IsTimeout(err) {
|
||||||
|
// Pod is created but its initialization has timed out.
|
||||||
|
// If the initialization is successful eventually, the
|
||||||
|
// controller will observe the creation via the informer.
|
||||||
|
// If the initialization fails, or if the pod keeps
|
||||||
|
// uninitialized for a long time, the informer will not
|
||||||
|
// receive any update, and the controller will create a new
|
||||||
|
// pod when the expectation expires.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
defer utilruntime.HandleError(err)
|
||||||
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
|
glog.V(2).Infof("Failed creation, decrementing expectations for job %q/%q", job.Namespace, job.Name)
|
||||||
|
jm.expectations.CreationObserved(jobKey)
|
||||||
|
activeLock.Lock()
|
||||||
|
active--
|
||||||
|
activeLock.Unlock()
|
||||||
|
errCh <- err
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wait.Wait()
|
||||||
|
// any skipped pods that we never attempted to start shouldn't be expected.
|
||||||
|
skippedPods := diff - batchSize
|
||||||
|
if errorCount < len(errCh) && skippedPods > 0 {
|
||||||
|
glog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for job %q/%q", skippedPods, job.Namespace, job.Name)
|
||||||
|
active -= skippedPods
|
||||||
|
for i := int32(0); i < skippedPods; i++ {
|
||||||
// Decrement the expected number of creates because the informer won't observe this pod
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
glog.V(2).Infof("Failed creation, decrementing expectations for job %q/%q", job.Namespace, job.Name)
|
|
||||||
jm.expectations.CreationObserved(jobKey)
|
jm.expectations.CreationObserved(jobKey)
|
||||||
activeLock.Lock()
|
|
||||||
active--
|
|
||||||
activeLock.Unlock()
|
|
||||||
errCh <- err
|
|
||||||
}
|
}
|
||||||
}()
|
// The skipped pods will be retried later. The next controller resync will
|
||||||
|
// retry the slow start process.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
diff -= batchSize
|
||||||
}
|
}
|
||||||
wait.Wait()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
|
@ -124,6 +124,7 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
parallelism int32
|
parallelism int32
|
||||||
completions int32
|
completions int32
|
||||||
deleting bool
|
deleting bool
|
||||||
|
podLimit int
|
||||||
|
|
||||||
// pod setup
|
// pod setup
|
||||||
podControllerError error
|
podControllerError error
|
||||||
@ -141,102 +142,107 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
expectedComplete bool
|
expectedComplete bool
|
||||||
}{
|
}{
|
||||||
"job start": {
|
"job start": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 0, 0, 0,
|
nil, 0, 0, 0, 0,
|
||||||
2, 0, 2, 0, 0, false,
|
2, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"WQ job start": {
|
"WQ job start": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 0, 0, 0,
|
nil, 0, 0, 0, 0,
|
||||||
2, 0, 2, 0, 0, false,
|
2, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"pending pods": {
|
"pending pods": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 2, 0, 0, 0,
|
nil, 2, 0, 0, 0,
|
||||||
0, 0, 2, 0, 0, false,
|
0, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"correct # of pods": {
|
"correct # of pods": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 2, 0, 0,
|
nil, 0, 2, 0, 0,
|
||||||
0, 0, 2, 0, 0, false,
|
0, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"WQ job: correct # of pods": {
|
"WQ job: correct # of pods": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 2, 0, 0,
|
nil, 0, 2, 0, 0,
|
||||||
0, 0, 2, 0, 0, false,
|
0, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"too few active pods": {
|
"too few active pods": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 1, 1, 0,
|
nil, 0, 1, 1, 0,
|
||||||
1, 0, 2, 1, 0, false,
|
1, 0, 2, 1, 0, false,
|
||||||
},
|
},
|
||||||
"too few active pods with a dynamic job": {
|
"too few active pods with a dynamic job": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 1, 0, 0,
|
nil, 0, 1, 0, 0,
|
||||||
1, 0, 2, 0, 0, false,
|
1, 0, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"too few active pods, with controller error": {
|
"too few active pods, with controller error": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
fmt.Errorf("Fake error"), 0, 1, 1, 0,
|
fmt.Errorf("Fake error"), 0, 1, 1, 0,
|
||||||
1, 0, 1, 1, 0, false,
|
1, 0, 1, 1, 0, false,
|
||||||
},
|
},
|
||||||
"too many active pods": {
|
"too many active pods": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 3, 0, 0,
|
nil, 0, 3, 0, 0,
|
||||||
0, 1, 2, 0, 0, false,
|
0, 1, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"too many active pods, with controller error": {
|
"too many active pods, with controller error": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
fmt.Errorf("Fake error"), 0, 3, 0, 0,
|
fmt.Errorf("Fake error"), 0, 3, 0, 0,
|
||||||
0, 1, 3, 0, 0, false,
|
0, 1, 3, 0, 0, false,
|
||||||
},
|
},
|
||||||
"failed pod": {
|
"failed pod": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 1, 1, 1,
|
nil, 0, 1, 1, 1,
|
||||||
1, 0, 2, 1, 1, false,
|
1, 0, 2, 1, 1, false,
|
||||||
},
|
},
|
||||||
"job finish": {
|
"job finish": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 0, 5, 0,
|
nil, 0, 0, 5, 0,
|
||||||
0, 0, 0, 5, 0, true,
|
0, 0, 0, 5, 0, true,
|
||||||
},
|
},
|
||||||
"WQ job finishing": {
|
"WQ job finishing": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 1, 1, 0,
|
nil, 0, 1, 1, 0,
|
||||||
0, 0, 1, 1, 0, false,
|
0, 0, 1, 1, 0, false,
|
||||||
},
|
},
|
||||||
"WQ job all finished": {
|
"WQ job all finished": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 0, 2, 0,
|
nil, 0, 0, 2, 0,
|
||||||
0, 0, 0, 2, 0, true,
|
0, 0, 0, 2, 0, true,
|
||||||
},
|
},
|
||||||
"WQ job all finished despite one failure": {
|
"WQ job all finished despite one failure": {
|
||||||
2, -1, false,
|
2, -1, false, 0,
|
||||||
nil, 0, 0, 1, 1,
|
nil, 0, 0, 1, 1,
|
||||||
0, 0, 0, 1, 1, true,
|
0, 0, 0, 1, 1, true,
|
||||||
},
|
},
|
||||||
"more active pods than completions": {
|
"more active pods than completions": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 10, 0, 0,
|
nil, 0, 10, 0, 0,
|
||||||
0, 8, 2, 0, 0, false,
|
0, 8, 2, 0, 0, false,
|
||||||
},
|
},
|
||||||
"status change": {
|
"status change": {
|
||||||
2, 5, false,
|
2, 5, false, 0,
|
||||||
nil, 0, 2, 2, 0,
|
nil, 0, 2, 2, 0,
|
||||||
0, 0, 2, 2, 0, false,
|
0, 0, 2, 2, 0, false,
|
||||||
},
|
},
|
||||||
"deleting job": {
|
"deleting job": {
|
||||||
2, 5, true,
|
2, 5, true, 0,
|
||||||
nil, 1, 1, 1, 0,
|
nil, 1, 1, 1, 0,
|
||||||
0, 0, 2, 1, 0, false,
|
0, 0, 2, 1, 0, false,
|
||||||
},
|
},
|
||||||
|
"limited pods": {
|
||||||
|
100, 200, false, 10,
|
||||||
|
nil, 0, 0, 0, 0,
|
||||||
|
10, 0, 10, 0, 0, false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
// job manager setup
|
// job manager setup
|
||||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
|
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
|
||||||
manager, sharedInformerFactory := newJobControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
manager, sharedInformerFactory := newJobControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
||||||
fakePodControl := controller.FakePodControl{Err: tc.podControllerError}
|
fakePodControl := controller.FakePodControl{Err: tc.podControllerError, CreateLimit: tc.podLimit}
|
||||||
manager.podControl = &fakePodControl
|
manager.podControl = &fakePodControl
|
||||||
manager.podStoreSynced = alwaysReady
|
manager.podStoreSynced = alwaysReady
|
||||||
manager.jobStoreSynced = alwaysReady
|
manager.jobStoreSynced = alwaysReady
|
||||||
@ -276,7 +282,7 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
t.Errorf("%s: Syncing jobs would return error when podController exception", name)
|
t.Errorf("%s: Syncing jobs would return error when podController exception", name)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err != nil {
|
if err != nil && (tc.podLimit == 0 || fakePodControl.CreateCallCount < tc.podLimit) {
|
||||||
t.Errorf("%s: unexpected error when syncing jobs %v", name, err)
|
t.Errorf("%s: unexpected error when syncing jobs %v", name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -327,6 +333,14 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
if tc.expectedComplete && !getCondition(actual, batch.JobComplete) {
|
if tc.expectedComplete && !getCondition(actual, batch.JobComplete) {
|
||||||
t.Errorf("%s: expected completion condition. Got %#v", name, actual.Status.Conditions)
|
t.Errorf("%s: expected completion condition. Got %#v", name, actual.Status.Conditions)
|
||||||
}
|
}
|
||||||
|
// validate slow start
|
||||||
|
expectedLimit := 0
|
||||||
|
for pass := uint8(0); expectedLimit <= tc.podLimit; pass++ {
|
||||||
|
expectedLimit += controller.SlowStartInitialBatchSize << pass
|
||||||
|
}
|
||||||
|
if tc.podLimit > 0 && fakePodControl.CreateCallCount > expectedLimit {
|
||||||
|
t.Errorf("%s: Unexpected number of create calls. Expected <= %d, saw %d\n", name, fakePodControl.CreateLimit*2, fakePodControl.CreateCallCount)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ go_library(
|
|||||||
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
|
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||||
|
"//vendor/k8s.io/client-go/util/integer:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -43,6 +43,7 @@ import (
|
|||||||
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
|
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
|
"k8s.io/client-go/util/integer"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
@ -450,41 +451,65 @@ func (rsc *ReplicaSetController) manageReplicas(filteredPods []*v1.Pod, rs *exte
|
|||||||
// beforehand and store it via ExpectCreations.
|
// beforehand and store it via ExpectCreations.
|
||||||
rsc.expectations.ExpectCreations(rsKey, diff)
|
rsc.expectations.ExpectCreations(rsKey, diff)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(diff)
|
|
||||||
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rs.Namespace, rs.Name, *(rs.Spec.Replicas), diff)
|
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rs.Namespace, rs.Name, *(rs.Spec.Replicas), diff)
|
||||||
for i := 0; i < diff; i++ {
|
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||||
go func() {
|
// and double with each successful iteration in a kind of "slow start".
|
||||||
defer wg.Done()
|
// This handles attempts to start large numbers of pods that would
|
||||||
var err error
|
// likely all fail with the same error. For example a project with a
|
||||||
boolPtr := func(b bool) *bool { return &b }
|
// low quota that attempts to create a large number of pods will be
|
||||||
controllerRef := &metav1.OwnerReference{
|
// prevented from spamming the API service with the pod create requests
|
||||||
APIVersion: controllerKind.GroupVersion().String(),
|
// after one of its pods fails. Conveniently, this also prevents the
|
||||||
Kind: controllerKind.Kind,
|
// event spam that those failures would generate.
|
||||||
Name: rs.Name,
|
for batchSize := integer.IntMin(diff, controller.SlowStartInitialBatchSize); diff > 0; batchSize = integer.IntMin(2*batchSize, diff) {
|
||||||
UID: rs.UID,
|
errorCount := len(errCh)
|
||||||
BlockOwnerDeletion: boolPtr(true),
|
wg.Add(batchSize)
|
||||||
Controller: boolPtr(true),
|
for i := 0; i < batchSize; i++ {
|
||||||
}
|
go func() {
|
||||||
err = rsc.podControl.CreatePodsWithControllerRef(rs.Namespace, &rs.Spec.Template, rs, controllerRef)
|
defer wg.Done()
|
||||||
if err != nil && errors.IsTimeout(err) {
|
var err error
|
||||||
// Pod is created but its initialization has timed out.
|
boolPtr := func(b bool) *bool { return &b }
|
||||||
// If the initialization is successful eventually, the
|
controllerRef := &metav1.OwnerReference{
|
||||||
// controller will observe the creation via the informer.
|
APIVersion: controllerKind.GroupVersion().String(),
|
||||||
// If the initialization fails, or if the pod keeps
|
Kind: controllerKind.Kind,
|
||||||
// uninitialized for a long time, the informer will not
|
Name: rs.Name,
|
||||||
// receive any update, and the controller will create a new
|
UID: rs.UID,
|
||||||
// pod when the expectation expires.
|
BlockOwnerDeletion: boolPtr(true),
|
||||||
return
|
Controller: boolPtr(true),
|
||||||
}
|
}
|
||||||
if err != nil {
|
err = rsc.podControl.CreatePodsWithControllerRef(rs.Namespace, &rs.Spec.Template, rs, controllerRef)
|
||||||
|
if err != nil && errors.IsTimeout(err) {
|
||||||
|
// Pod is created but its initialization has timed out.
|
||||||
|
// If the initialization is successful eventually, the
|
||||||
|
// controller will observe the creation via the informer.
|
||||||
|
// If the initialization fails, or if the pod keeps
|
||||||
|
// uninitialized for a long time, the informer will not
|
||||||
|
// receive any update, and the controller will create a new
|
||||||
|
// pod when the expectation expires.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
|
glog.V(2).Infof("Failed creation, decrementing expectations for replica set %q/%q", rs.Namespace, rs.Name)
|
||||||
|
rsc.expectations.CreationObserved(rsKey)
|
||||||
|
errCh <- err
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
// any skipped pods that we never attempted to start shouldn't be expected.
|
||||||
|
skippedPods := diff - batchSize
|
||||||
|
if errorCount < len(errCh) && skippedPods > 0 {
|
||||||
|
glog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for replica set %q/%q", skippedPods, rs.Namespace, rs.Name)
|
||||||
|
for i := 0; i < skippedPods; i++ {
|
||||||
// Decrement the expected number of creates because the informer won't observe this pod
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
glog.V(2).Infof("Failed creation, decrementing expectations for replica set %q/%q", rs.Namespace, rs.Name)
|
|
||||||
rsc.expectations.CreationObserved(rsKey)
|
rsc.expectations.CreationObserved(rsKey)
|
||||||
errCh <- err
|
|
||||||
}
|
}
|
||||||
}()
|
// The skipped pods will be retried later. The next controller resync will
|
||||||
|
// retry the slow start process.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
diff -= batchSize
|
||||||
}
|
}
|
||||||
wg.Wait()
|
|
||||||
} else if diff > 0 {
|
} else if diff > 0 {
|
||||||
if diff > rsc.burstReplicas {
|
if diff > rsc.burstReplicas {
|
||||||
diff = rsc.burstReplicas
|
diff = rsc.burstReplicas
|
||||||
|
@ -324,6 +324,33 @@ func TestSyncReplicaSetCreates(t *testing.T) {
|
|||||||
validateSyncReplicaSet(t, &fakePodControl, 2, 0, 0)
|
validateSyncReplicaSet(t, &fakePodControl, 2, 0, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tell the rs to create 100 replicas, but simulate a limit (like a quota limit)
|
||||||
|
// of 10, and verify that the rs doesn't make 100 create calls per sync pass
|
||||||
|
func TestSyncReplicaSetCreateFailures(t *testing.T) {
|
||||||
|
fakePodControl := controller.FakePodControl{}
|
||||||
|
fakePodControl.CreateLimit = 10
|
||||||
|
|
||||||
|
labelMap := map[string]string{"foo": "bar"}
|
||||||
|
rs := newReplicaSet(fakePodControl.CreateLimit*10, labelMap)
|
||||||
|
client := fake.NewSimpleClientset(rs)
|
||||||
|
stopCh := make(chan struct{})
|
||||||
|
defer close(stopCh)
|
||||||
|
manager, informers := testNewReplicaSetControllerFromClient(client, stopCh, BurstReplicas)
|
||||||
|
|
||||||
|
informers.Extensions().V1beta1().ReplicaSets().Informer().GetIndexer().Add(rs)
|
||||||
|
|
||||||
|
manager.podControl = &fakePodControl
|
||||||
|
manager.syncReplicaSet(getKey(rs, t))
|
||||||
|
validateSyncReplicaSet(t, &fakePodControl, fakePodControl.CreateLimit, 0, 0)
|
||||||
|
expectedLimit := 0
|
||||||
|
for pass := uint8(0); expectedLimit <= fakePodControl.CreateLimit; pass++ {
|
||||||
|
expectedLimit += controller.SlowStartInitialBatchSize << pass
|
||||||
|
}
|
||||||
|
if fakePodControl.CreateCallCount > expectedLimit {
|
||||||
|
t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", fakePodControl.CreateLimit*2, fakePodControl.CreateCallCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestStatusUpdatesWithoutReplicasChange(t *testing.T) {
|
func TestStatusUpdatesWithoutReplicasChange(t *testing.T) {
|
||||||
// Setup a fake server to listen for requests, and run the ReplicaSet controller in steady state
|
// Setup a fake server to listen for requests, and run the ReplicaSet controller in steady state
|
||||||
fakeHandler := utiltesting.FakeHandler{
|
fakeHandler := utiltesting.FakeHandler{
|
||||||
|
@ -32,6 +32,7 @@ go_library(
|
|||||||
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
|
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||||
|
"//vendor/k8s.io/client-go/util/integer:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -40,6 +40,7 @@ import (
|
|||||||
corelisters "k8s.io/client-go/listers/core/v1"
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
|
"k8s.io/client-go/util/integer"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
@ -446,42 +447,66 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*v1.Pod, rc *v1.Repl
|
|||||||
errCh := make(chan error, diff)
|
errCh := make(chan error, diff)
|
||||||
rm.expectations.ExpectCreations(rcKey, diff)
|
rm.expectations.ExpectCreations(rcKey, diff)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(diff)
|
|
||||||
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rc.Namespace, rc.Name, *(rc.Spec.Replicas), diff)
|
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rc.Namespace, rc.Name, *(rc.Spec.Replicas), diff)
|
||||||
for i := 0; i < diff; i++ {
|
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||||
go func() {
|
// and double with each successful iteration in a kind of "slow start".
|
||||||
defer wg.Done()
|
// This handles attempts to start large numbers of pods that would
|
||||||
var err error
|
// likely all fail with the same error. For example a project with a
|
||||||
boolPtr := func(b bool) *bool { return &b }
|
// low quota that attempts to create a large number of pods will be
|
||||||
controllerRef := &metav1.OwnerReference{
|
// prevented from spamming the API service with the pod create requests
|
||||||
APIVersion: controllerKind.GroupVersion().String(),
|
// after one of its pods fails. Conveniently, this also prevents the
|
||||||
Kind: controllerKind.Kind,
|
// event spam that those failures would generate.
|
||||||
Name: rc.Name,
|
for batchSize := integer.IntMin(diff, controller.SlowStartInitialBatchSize); diff > 0; batchSize = integer.IntMin(2*batchSize, diff) {
|
||||||
UID: rc.UID,
|
errorCount := len(errCh)
|
||||||
BlockOwnerDeletion: boolPtr(true),
|
wg.Add(batchSize)
|
||||||
Controller: boolPtr(true),
|
for i := 0; i < batchSize; i++ {
|
||||||
}
|
go func() {
|
||||||
err = rm.podControl.CreatePodsWithControllerRef(rc.Namespace, rc.Spec.Template, rc, controllerRef)
|
defer wg.Done()
|
||||||
if err != nil && errors.IsTimeout(err) {
|
var err error
|
||||||
// Pod is created but its initialization has timed out.
|
boolPtr := func(b bool) *bool { return &b }
|
||||||
// If the initialization is successful eventually, the
|
controllerRef := &metav1.OwnerReference{
|
||||||
// controller will observe the creation via the informer.
|
APIVersion: controllerKind.GroupVersion().String(),
|
||||||
// If the initialization fails, or if the pod keeps
|
Kind: controllerKind.Kind,
|
||||||
// uninitialized for a long time, the informer will not
|
Name: rc.Name,
|
||||||
// receive any update, and the controller will create a new
|
UID: rc.UID,
|
||||||
// pod when the expectation expires.
|
BlockOwnerDeletion: boolPtr(true),
|
||||||
return
|
Controller: boolPtr(true),
|
||||||
}
|
}
|
||||||
if err != nil {
|
err = rm.podControl.CreatePodsWithControllerRef(rc.Namespace, rc.Spec.Template, rc, controllerRef)
|
||||||
|
if err != nil && errors.IsTimeout(err) {
|
||||||
|
// Pod is created but its initialization has timed out.
|
||||||
|
// If the initialization is successful eventually, the
|
||||||
|
// controller will observe the creation via the informer.
|
||||||
|
// If the initialization fails, or if the pod keeps
|
||||||
|
// uninitialized for a long time, the informer will not
|
||||||
|
// receive any update, and the controller will create a new
|
||||||
|
// pod when the expectation expires.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
|
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name)
|
||||||
|
rm.expectations.CreationObserved(rcKey)
|
||||||
|
errCh <- err
|
||||||
|
utilruntime.HandleError(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
// any skipped pods that we never attempted to start shouldn't be expected.
|
||||||
|
skippedPods := diff - batchSize
|
||||||
|
if errorCount < len(errCh) && skippedPods > 0 {
|
||||||
|
glog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for controller %q/%q", skippedPods, rc.Namespace, rc.Name)
|
||||||
|
for i := 0; i < skippedPods; i++ {
|
||||||
// Decrement the expected number of creates because the informer won't observe this pod
|
// Decrement the expected number of creates because the informer won't observe this pod
|
||||||
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name)
|
|
||||||
rm.expectations.CreationObserved(rcKey)
|
rm.expectations.CreationObserved(rcKey)
|
||||||
errCh <- err
|
|
||||||
utilruntime.HandleError(err)
|
|
||||||
}
|
}
|
||||||
}()
|
// The skipped pods will be retried later. The next controller resync will
|
||||||
|
// retry the slow start process.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
diff -= batchSize
|
||||||
}
|
}
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case err := <-errCh:
|
case err := <-errCh:
|
||||||
|
@ -284,6 +284,30 @@ func TestSyncReplicationControllerCreates(t *testing.T) {
|
|||||||
validateSyncReplication(t, &fakePodControl, 2, 0, 0)
|
validateSyncReplication(t, &fakePodControl, 2, 0, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tell the controller to create 100 replicas, but simulate a limit (like a quota limit)
|
||||||
|
// of 10, and verify that the controller doesn't make 100 create calls per sync pass
|
||||||
|
func TestSyncReplicationControllerCreateFailures(t *testing.T) {
|
||||||
|
fakePodControl := controller.FakePodControl{}
|
||||||
|
fakePodControl.CreateLimit = 10
|
||||||
|
|
||||||
|
rc := newReplicationController(fakePodControl.CreateLimit * 10)
|
||||||
|
c := fake.NewSimpleClientset(rc)
|
||||||
|
manager, _ /*podInformer*/, rcInformer := newReplicationManagerFromClient(c, BurstReplicas)
|
||||||
|
|
||||||
|
rcInformer.Informer().GetIndexer().Add(rc)
|
||||||
|
|
||||||
|
manager.podControl = &fakePodControl
|
||||||
|
manager.syncReplicationController(getKey(rc, t))
|
||||||
|
validateSyncReplication(t, &fakePodControl, fakePodControl.CreateLimit, 0, 0)
|
||||||
|
expectedLimit := 0
|
||||||
|
for pass := uint8(0); expectedLimit <= fakePodControl.CreateLimit; pass++ {
|
||||||
|
expectedLimit += controller.SlowStartInitialBatchSize << pass
|
||||||
|
}
|
||||||
|
if fakePodControl.CreateCallCount > expectedLimit {
|
||||||
|
t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", fakePodControl.CreateLimit*2, fakePodControl.CreateCallCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestStatusUpdatesWithoutReplicasChange(t *testing.T) {
|
func TestStatusUpdatesWithoutReplicasChange(t *testing.T) {
|
||||||
// Setup a fake server to listen for requests, and run the rc manager in steady state
|
// Setup a fake server to listen for requests, and run the rc manager in steady state
|
||||||
fakeHandler := utiltesting.FakeHandler{
|
fakeHandler := utiltesting.FakeHandler{
|
||||||
|
Loading…
Reference in New Issue
Block a user