19848: Retry service IP repair on conflict

Components can write services during startup, which results in the ip
allocator map being updated. Since core controllers *must* succeed for
the masters to start, we should retry a few times in order to pass.
This commit is contained in:
Clayton Coleman
2016-01-20 09:20:29 -05:00
parent 4803b8a984
commit e2eb1b0dc4
2 changed files with 23 additions and 4 deletions

View File

@@ -22,6 +22,8 @@ import (
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/registry/service"
"k8s.io/kubernetes/pkg/registry/service/ipallocator"
"k8s.io/kubernetes/pkg/util"
@@ -72,6 +74,11 @@ func (c *Repair) RunUntil(ch chan struct{}) {
// RunOnce verifies the state of the cluster IP allocations and returns an error if an unrecoverable problem occurs.
func (c *Repair) RunOnce() error {
return client.RetryOnConflict(client.DefaultBackoff, c.runOnce)
}
// runOnce verifies the state of the cluster IP allocations and returns an error if an unrecoverable problem occurs.
func (c *Repair) runOnce() error {
// TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read,
// or if they are executed against different leaders,
// the ordering guarantee required to ensure no IP is allocated twice is violated.
@@ -127,12 +134,14 @@ func (c *Repair) RunOnce() error {
}
}
err = r.Snapshot(latest)
if err != nil {
return fmt.Errorf("unable to persist the updated service IP allocations: %v", err)
if err := r.Snapshot(latest); err != nil {
return fmt.Errorf("unable to snapshot the updated service IP allocations: %v", err)
}
if err := c.alloc.CreateOrUpdate(latest); err != nil {
if errors.IsConflict(err) {
return err
}
return fmt.Errorf("unable to persist the updated service IP allocations: %v", err)
}
return nil

View File

@@ -21,6 +21,8 @@ import (
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/registry/service"
"k8s.io/kubernetes/pkg/registry/service/portallocator"
"k8s.io/kubernetes/pkg/util"
@@ -57,6 +59,11 @@ func (c *Repair) RunUntil(ch chan struct{}) {
// RunOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs.
func (c *Repair) RunOnce() error {
return client.RetryOnConflict(client.DefaultBackoff, c.runOnce)
}
// runOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs.
func (c *Repair) runOnce() error {
// TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read,
// or if they are executed against different leaders,
// the ordering guarantee required to ensure no port is allocated twice is violated.
@@ -116,10 +123,13 @@ func (c *Repair) RunOnce() error {
err = r.Snapshot(latest)
if err != nil {
return fmt.Errorf("unable to persist the updated port allocations: %v", err)
return fmt.Errorf("unable to snapshot the updated port allocations: %v", err)
}
if err := c.alloc.CreateOrUpdate(latest); err != nil {
if errors.IsConflict(err) {
return err
}
return fmt.Errorf("unable to persist the updated port allocations: %v", err)
}
return nil