Merge pull request #39206 from thockin/allocator-repair-race

Automatic merge from submit-queue (batch tested with PRs 39250, 39206) WIP: work around for IP and port Allocator repair race Fixes #37488 WIP: This does the IP allocator but not the port allocator yet. Sending for review before I clone the deltas for ports. Idea: force the repair loop to detect a leak 3 times in a row before actually releasing the IP. That should allow any distributed races to resolve. It's a little hacky, but without mutual exclusion or proper transactions, it works.
2025-07-27 21:47:07 +00:00 · 2016-12-27 12:24:08 -08:00 · 2016-12-27 12:24:08 -08:00 · 9c2bd62159
commit 9c2bd62159
parent 87d6fe4e55 64f5b050a1
18 changed files with 667 additions and 50 deletions
--- a/federation/apis/federation/v1beta1/generated.proto
+++ b/federation/apis/federation/v1beta1/generated.proto
@ -100,7 +100,7 @@ message ClusterSpec {
  optional k8s.io.kubernetes.pkg.api.v1.LocalObjectReference secretRef = 2;
 }
-// ClusterStatus is information about the current status of a cluster updated by cluster controller peridocally.
+// ClusterStatus is information about the current status of a cluster updated by cluster controller periodically.
 message ClusterStatus {
  // Conditions is an array of current cluster conditions.
  // +optional
--- a/hack/update-openapi-spec.sh
+++ b/hack/update-openapi-spec.sh
@ -65,6 +65,8 @@ kube::log::status "Starting kube-apiserver"
  --cert-dir="${TMP_DIR}/certs" \
  --runtime-config="api/all=true" \
  --token-auth-file=$TMP_DIR/tokenauth.csv \
  --logtostderr \
  --v=2 \
  --service-cluster-ip-range="10.0.0.0/24" >/tmp/openapi-api-server.log 2>&1 &
 APISERVER_PID=$!
--- a/pkg/registry/core/service/allocator/BUILD
+++ b/pkg/registry/core/service/allocator/BUILD
@ -26,5 +26,5 @@ go_test(
    ],
    library = "go_default_library",
    tags = ["automanaged"],
-    deps = [],
+    deps = ["//vendor:k8s.io/client-go/pkg/util/sets"],
 )
--- a/pkg/registry/core/service/allocator/bitmap.go
+++ b/pkg/registry/core/service/allocator/bitmap.go
@ -124,6 +124,33 @@ func (r *AllocationBitmap) Release(offset int) error {
 	return nil
 }
 const (
 	// Find the size of a big.Word in bytes.
 	notZero   = uint64(^big.Word(0))
 	wordPower = (notZero>>8)&1 + (notZero>>16)&1 + (notZero>>32)&1
 	wordSize  = 1 << wordPower
 )
 // ForEach calls the provided function for each allocated bit.  The
 // AllocationBitmap may not be modified while this loop is running.
 func (r *AllocationBitmap) ForEach(fn func(int)) {
 	r.lock.Lock()
 	defer r.lock.Unlock()
 	words := r.allocated.Bits()
 	for wordIdx, word := range words {
 		bit := 0
 		for word > 0 {
 			if (word & 1) != 0 {
 				fn((wordIdx * wordSize * 8) + bit)
 				word = word &^ 1
 			}
 			bit++
 			word = word >> 1
 		}
 	}
 }
 // Has returns true if the provided item is already allocated and a call
 // to Allocate(offset) would fail.
 func (r *AllocationBitmap) Has(offset int) bool {
--- a/pkg/registry/core/service/allocator/bitmap_test.go
+++ b/pkg/registry/core/service/allocator/bitmap_test.go
@ -18,6 +18,8 @@ package allocator
 import (
 	"testing"
 	"k8s.io/client-go/pkg/util/sets"
 )
 func TestAllocate(t *testing.T) {
@ -82,6 +84,37 @@ func TestRelease(t *testing.T) {
 	}
 }
 func TestForEach(t *testing.T) {
 	testCases := []sets.Int{
 		sets.NewInt(),
 		sets.NewInt(0),
 		sets.NewInt(0, 2, 5, 9),
 		sets.NewInt(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
 	}
 	for i, tc := range testCases {
 		m := NewAllocationMap(10, "test")
 		for offset := range tc {
 			if ok, _ := m.Allocate(offset); !ok {
 				t.Errorf("[%d] error allocate offset %v", i, offset)
 			}
 			if !m.Has(offset) {
 				t.Errorf("[%d] expect offset %v allocated", i, offset)
 			}
 		}
 		calls := sets.NewInt()
 		m.ForEach(func(i int) {
 			calls.Insert(i)
 		})
 		if len(calls) != len(tc) {
 			t.Errorf("[%d] expected %d calls, got %d", i, len(tc), len(calls))
 		}
 		if !calls.Equal(tc) {
 			t.Errorf("[%d] expected calls to equal testcase: %v vs %v", i, calls.List(), tc.List())
 		}
 	}
 }
 func TestSnapshotAndRestore(t *testing.T) {
 	offset := 3
 	m := NewAllocationMap(10, "test")
--- a/pkg/registry/core/service/allocator/etcd/etcd.go
+++ b/pkg/registry/core/service/allocator/etcd/etcd.go
@ -144,6 +144,12 @@ func (e *Etcd) Release(item int) error {
 	})
 }
 func (e *Etcd) ForEach(fn func(int)) {
 	e.lock.Lock()
 	defer e.lock.Unlock()
 	e.alloc.ForEach(fn)
 }
 // tryUpdate performs a read-update to persist the latest snapshot state of allocation.
 func (e *Etcd) tryUpdate(fn func() error) error {
 	err := e.storage.GuaranteedUpdate(context.TODO(), e.baseKey, &api.RangeAllocation{}, true, nil,
--- a/pkg/registry/core/service/allocator/interfaces.go
+++ b/pkg/registry/core/service/allocator/interfaces.go
@ -22,6 +22,7 @@ type Interface interface {
 	Allocate(int) (bool, error)
 	AllocateNext() (int, bool, error)
 	Release(int) error
 	ForEach(func(int))
 	// For testing
 	Has(int) bool
--- a/pkg/registry/core/service/ipallocator/allocator.go
+++ b/pkg/registry/core/service/ipallocator/allocator.go
@ -32,6 +32,7 @@ type Interface interface {
 	Allocate(net.IP) error
 	AllocateNext() (net.IP, error)
 	Release(net.IP) error
 	ForEach(func(net.IP))
 }
 var (
@ -89,6 +90,19 @@ func NewCIDRRange(cidr *net.IPNet) *Range {
 	})
 }
 // NewFromSnapshot allocates a Range and initializes it from a snapshot.
 func NewFromSnapshot(snap *api.RangeAllocation) (*Range, error) {
 	_, ipnet, err := net.ParseCIDR(snap.Range)
 	if err != nil {
 		return nil, err
 	}
 	r := NewCIDRRange(ipnet)
 	if err := r.Restore(ipnet, snap.Data); err != nil {
 		return nil, err
 	}
 	return r, nil
 }
 func maximum(a, b int) int {
 	if a > b {
 		return a
@ -101,6 +115,16 @@ func (r *Range) Free() int {
 	return r.alloc.Free()
 }
 // Used returns the count of IP addresses used in the range.
 func (r *Range) Used() int {
 	return r.max - r.alloc.Free()
 }
 // CIDR returns the CIDR covered by the range.
 func (r *Range) CIDR() net.IPNet {
 	return *r.net
 }
 // Allocate attempts to reserve the provided IP. ErrNotInRange or
 // ErrAllocated will be returned if the IP is not valid for this range
 // or has already been reserved.  ErrFull will be returned if there
@ -146,6 +170,14 @@ func (r *Range) Release(ip net.IP) error {
 	return r.alloc.Release(offset)
 }
 // ForEach calls the provided function for each allocated IP.
 func (r *Range) ForEach(fn func(net.IP)) {
 	r.alloc.ForEach(func(offset int) {
 		ip, _ := GetIndexedIP(r.net, offset+1) // +1 because Range doesn't store IP 0
 		fn(ip)
 	})
 }
 // Has returns true if the provided IP is already allocated and a call
 // to Allocate(ip) would fail with ErrAllocated.
 func (r *Range) Has(ip net.IP) bool {
--- a/pkg/registry/core/service/ipallocator/allocator_test.go
+++ b/pkg/registry/core/service/ipallocator/allocator_test.go
@ -34,6 +34,9 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 254 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 0 {
 		t.Errorf("unexpected used %d", f)
 	}
 	found := sets.NewString()
 	count := 0
 	for r.Free() > 0 {
@ -61,6 +64,9 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 1 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 253 {
 		t.Errorf("unexpected free %d", f)
 	}
 	ip, err := r.AllocateNext()
 	if err != nil {
 		t.Fatal(err)
@ -87,12 +93,18 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 1 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 253 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if err := r.Allocate(released); err != nil {
 		t.Fatal(err)
 	}
 	if f := r.Free(); f != 0 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 254 {
 		t.Errorf("unexpected free %d", f)
 	}
 }
 func TestAllocateTiny(t *testing.T) {
@ -167,6 +179,43 @@ func TestRangeSize(t *testing.T) {
 	}
 }
 func TestForEach(t *testing.T) {
 	_, cidr, err := net.ParseCIDR("192.168.1.0/24")
 	if err != nil {
 		t.Fatal(err)
 	}
 	testCases := []sets.String{
 		sets.NewString(),
 		sets.NewString("192.168.1.1"),
 		sets.NewString("192.168.1.1", "192.168.1.254"),
 		sets.NewString("192.168.1.1", "192.168.1.128", "192.168.1.254"),
 	}
 	for i, tc := range testCases {
 		r := NewCIDRRange(cidr)
 		for ips := range tc {
 			ip := net.ParseIP(ips)
 			if err := r.Allocate(ip); err != nil {
 				t.Errorf("[%d] error allocating IP %v: %v", i, ip, err)
 			}
 			if !r.Has(ip) {
 				t.Errorf("[%d] expected IP %v allocated", i, ip)
 			}
 		}
 		calls := sets.NewString()
 		r.ForEach(func(ip net.IP) {
 			calls.Insert(ip.String())
 		})
 		if len(calls) != len(tc) {
 			t.Errorf("[%d] expected %d calls, got %d", i, len(tc), len(calls))
 		}
 		if !calls.Equal(tc) {
 			t.Errorf("[%d] expected calls to equal testcase: %v vs %v", i, calls.List(), tc.List())
 		}
 	}
 }
 func TestSnapshot(t *testing.T) {
 	_, cidr, err := net.ParseCIDR("192.168.1.0/24")
 	if err != nil {
@ -219,3 +268,42 @@ func TestSnapshot(t *testing.T) {
 		t.Errorf("counts do not match: %d", other.Free())
 	}
 }
 func TestNewFromSnapshot(t *testing.T) {
 	_, cidr, err := net.ParseCIDR("192.168.0.0/24")
 	if err != nil {
 		t.Fatal(err)
 	}
 	r := NewCIDRRange(cidr)
 	allocated := []net.IP{}
 	for i := 0; i < 128; i++ {
 		ip, err := r.AllocateNext()
 		if err != nil {
 			t.Fatal(err)
 		}
 		allocated = append(allocated, ip)
 	}
 	snapshot := api.RangeAllocation{}
 	if err = r.Snapshot(&snapshot); err != nil {
 		t.Fatal(err)
 	}
 	r, err = NewFromSnapshot(&snapshot)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if x := r.Free(); x != 126 {
 		t.Fatalf("expected 126 free IPs, got %d", x)
 	}
 	if x := r.Used(); x != 128 {
 		t.Fatalf("expected 128 used IPs, got %d", x)
 	}
 	for _, ip := range allocated {
 		if !r.Has(ip) {
 			t.Fatalf("expected IP to be allocated, but it was not")
 		}
 	}
 }
--- a/pkg/registry/core/service/ipallocator/controller/repair.go
+++ b/pkg/registry/core/service/ipallocator/controller/repair.go
@ -51,8 +51,13 @@ type Repair struct {
 	serviceClient coreclient.ServicesGetter
 	network       *net.IPNet
 	alloc         rangeallocation.RangeRegistry
 	leaks         map[string]int // counter per leaked IP
 }
 // How many times we need to detect a leak before we clean up.  This is to
 // avoid races between allocating an IP and using it.
 const numRepairsBeforeLeakCleanup = 3
 // NewRepair creates a controller that periodically ensures that all clusterIPs are uniquely allocated across the cluster
 // and generates informational warnings for a cluster that is not in sync.
 func NewRepair(interval time.Duration, serviceClient coreclient.ServicesGetter, network *net.IPNet, alloc rangeallocation.RangeRegistry) *Repair {
@ -61,6 +66,7 @@ func NewRepair(interval time.Duration, serviceClient coreclient.ServicesGetter,
 		serviceClient: serviceClient,
 		network:       network,
 		alloc:         alloc,
 		leaks:         map[string]int{},
 	}
 }
@ -89,18 +95,27 @@ func (c *Repair) runOnce() error {
 	// If etcd server is not running we should wait for some time and fail only then. This is particularly
 	// important when we start apiserver and etcd at the same time.
-	var latest *api.RangeAllocation
+	var snapshot *api.RangeAllocation
 	err := wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
 		var err error
-	err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
+		snapshot, err = c.alloc.Get()
 		latest, err = c.alloc.Get()
 		return err == nil, err
 	})
 	if err != nil {
 		return fmt.Errorf("unable to refresh the service IP block: %v", err)
 	}
 	// If not yet initialized.
 	if snapshot.Range == "" {
 		snapshot.Range = c.network.String()
 	}
 	// Create an allocator because it is easy to use.
 	stored, err := ipallocator.NewFromSnapshot(snapshot)
 	if err != nil {
 		return fmt.Errorf("unable to rebuild allocator from snapshot: %v", err)
 	}
 	// We explicitly send no resource version, since the resource version
-	// of 'latest' is from a different collection, it's not comparable to
+	// of 'snapshot' is from a different collection, it's not comparable to
 	// the service collection. The caching layer keeps per-collection RVs,
 	// and this is proper, since in theory the collections could be hosted
 	// in separate etcd (or even non-etcd) instances.
@ -109,40 +124,73 @@ func (c *Repair) runOnce() error {
 		return fmt.Errorf("unable to refresh the service IP block: %v", err)
 	}
-	r := ipallocator.NewCIDRRange(c.network)
+	rebuilt := ipallocator.NewCIDRRange(c.network)
 	// Check every Service's ClusterIP, and rebuild the state as we think it should be.
 	for _, svc := range list.Items {
 		if !api.IsServiceIPSet(&svc) {
 			// didn't need a cluster IP
 			continue
 		}
 		ip := net.ParseIP(svc.Spec.ClusterIP)
 		if ip == nil {
-			// cluster IP is broken, reallocate
+			// cluster IP is corrupt
 			runtime.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not a valid IP; please recreate", svc.Spec.ClusterIP, svc.Name, svc.Namespace))
 			continue
 		}
-		switch err := r.Allocate(ip); err {
+		// mark it as in-use
 		switch err := rebuilt.Allocate(ip); err {
 		case nil:
 			if stored.Has(ip) {
 				// remove it from the old set, so we can find leaks
 				stored.Release(ip)
 			} else {
 				// cluster IP doesn't seem to be allocated
 				runtime.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not allocated; repairing", svc.Spec.ClusterIP, svc.Name, svc.Namespace))
 			}
 			delete(c.leaks, ip.String()) // it is used, so it can't be leaked
 		case ipallocator.ErrAllocated:
 			// TODO: send event
-			// cluster IP is broken, reallocate
+			// cluster IP is duplicate
 			runtime.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s was assigned to multiple services; please recreate", ip, svc.Name, svc.Namespace))
 		case ipallocator.ErrNotInRange:
 			// TODO: send event
-			// cluster IP is broken, reallocate
+			// cluster IP is out of range
 			runtime.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not within the service CIDR %s; please recreate", ip, svc.Name, svc.Namespace, c.network))
 		case ipallocator.ErrFull:
 			// TODO: send event
-			return fmt.Errorf("the service CIDR %v is full; you must widen the CIDR in order to create new services", r)
+			// somehow we are out of IPs
 			return fmt.Errorf("the service CIDR %v is full; you must widen the CIDR in order to create new services", rebuilt)
 		default:
 			return fmt.Errorf("unable to allocate cluster IP %s for service %s/%s due to an unknown error, exiting: %v", ip, svc.Name, svc.Namespace, err)
 		}
 	}
-	if err := r.Snapshot(latest); err != nil {
+	// Check for IPs that are left in the old set.  They appear to have been leaked.
 	stored.ForEach(func(ip net.IP) {
 		count, found := c.leaks[ip.String()]
 		switch {
 		case !found:
 			// flag it to be cleaned up after any races (hopefully) are gone
 			runtime.HandleError(fmt.Errorf("the cluster IP %s may have leaked: flagging for later clean up", ip))
 			count = numRepairsBeforeLeakCleanup - 1
 			fallthrough
 		case count > 0:
 			// pretend it is still in use until count expires
 			c.leaks[ip.String()] = count - 1
 			if err := rebuilt.Allocate(ip); err != nil {
 				runtime.HandleError(fmt.Errorf("the cluster IP %s may have leaked, but can not be allocated: %v", ip, err))
 			}
 		default:
 			// do not add it to the rebuilt set, which means it will be available for reuse
 			runtime.HandleError(fmt.Errorf("the cluster IP %s appears to have leaked: cleaning up", ip))
 		}
 	})
 	// Blast the rebuilt state into storage.
 	if err := rebuilt.Snapshot(snapshot); err != nil {
 		return fmt.Errorf("unable to snapshot the updated service IP allocations: %v", err)
 	}
-
+	if err := c.alloc.CreateOrUpdate(snapshot); err != nil {
 	if err := c.alloc.CreateOrUpdate(latest); err != nil {
 		if errors.IsConflict(err) {
 			return err
 		}
--- a/pkg/registry/core/service/ipallocator/controller/repair_test.go
+++ b/pkg/registry/core/service/ipallocator/controller/repair_test.go
@ -50,10 +50,10 @@ func (r *mockRangeRegistry) CreateOrUpdate(alloc *api.RangeAllocation) error {
 func TestRepair(t *testing.T) {
 	fakeClient := fake.NewSimpleClientset()
 	_, cidr, _ := net.ParseCIDR("192.168.1.0/24")
 	ipregistry := &mockRangeRegistry{
-		item: &api.RangeAllocation{},
+		item: &api.RangeAllocation{Range: "192.168.1.0/24"},
 	}
 	_, cidr, _ := net.ParseCIDR(ipregistry.item.Range)
 	r := NewRepair(0, fakeClient.Core(), cidr, ipregistry)
 	if err := r.RunOnce(); err != nil {
@ -64,7 +64,7 @@ func TestRepair(t *testing.T) {
 	}
 	ipregistry = &mockRangeRegistry{
-		item:      &api.RangeAllocation{},
+		item:      &api.RangeAllocation{Range: "192.168.1.0/24"},
 		updateErr: fmt.Errorf("test error"),
 	}
 	r = NewRepair(0, fakeClient.Core(), cidr, ipregistry)
@ -73,7 +73,7 @@ func TestRepair(t *testing.T) {
 	}
 }
-func TestRepairEmpty(t *testing.T) {
+func TestRepairLeak(t *testing.T) {
 	_, cidr, _ := net.ParseCIDR("192.168.1.0/24")
 	previous := ipallocator.NewCIDRRange(cidr)
 	previous.Allocate(net.ParseIP("192.168.1.10"))
@ -94,16 +94,31 @@ func TestRepairEmpty(t *testing.T) {
 			Data:  dst.Data,
 		},
 	}
 	r := NewRepair(0, fakeClient.Core(), cidr, ipregistry)
 	// Run through the "leak detection holdoff" loops.
 	for i := 0; i < (numRepairsBeforeLeakCleanup - 1); i++ {
 		if err := r.RunOnce(); err != nil {
 			t.Fatal(err)
 		}
-	after := ipallocator.NewCIDRRange(cidr)
+		after, err := ipallocator.NewFromSnapshot(ipregistry.updated)
-	if err := after.Restore(cidr, ipregistry.updated.Data); err != nil {
+		if err != nil {
 			t.Fatal(err)
 		}
 		if !after.Has(net.ParseIP("192.168.1.10")) {
 			t.Errorf("expected ipallocator to still have leaked IP")
 		}
 	}
 	// Run one more time to actually remove the leak.
 	if err := r.RunOnce(); err != nil {
 		t.Fatal(err)
 	}
 	after, err := ipallocator.NewFromSnapshot(ipregistry.updated)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if after.Has(net.ParseIP("192.168.1.10")) {
-		t.Errorf("unexpected ipallocator state: %#v", after)
+		t.Errorf("expected ipallocator to not have leaked IP")
 	}
 }
@ -157,14 +172,14 @@ func TestRepairWithExisting(t *testing.T) {
 	if err := r.RunOnce(); err != nil {
 		t.Fatal(err)
 	}
-	after := ipallocator.NewCIDRRange(cidr)
+	after, err := ipallocator.NewFromSnapshot(ipregistry.updated)
-	if err := after.Restore(cidr, ipregistry.updated.Data); err != nil {
+	if err != nil {
 		t.Fatal(err)
 	}
 	if !after.Has(net.ParseIP("192.168.1.1")) || !after.Has(net.ParseIP("192.168.1.100")) {
 		t.Errorf("unexpected ipallocator state: %#v", after)
 	}
-	if after.Free() != 252 {
+	if free := after.Free(); free != 252 {
-		t.Errorf("unexpected ipallocator state: %#v", after)
+		t.Errorf("unexpected ipallocator state: %d free", free)
 	}
 }
--- a/pkg/registry/core/service/portallocator/allocator.go
+++ b/pkg/registry/core/service/portallocator/allocator.go
@ -33,6 +33,7 @@ type Interface interface {
 	Allocate(int) error
 	AllocateNext() (int, error)
 	Release(int) error
 	ForEach(func(int))
 }
 var (
@ -77,11 +78,29 @@ func NewPortAllocator(pr net.PortRange) *PortAllocator {
 	})
 }
 // NewFromSnapshot allocates a PortAllocator and initializes it from a snapshot.
 func NewFromSnapshot(snap *api.RangeAllocation) (*PortAllocator, error) {
 	pr, err := net.ParsePortRange(snap.Range)
 	if err != nil {
 		return nil, err
 	}
 	r := NewPortAllocator(*pr)
 	if err := r.Restore(*pr, snap.Data); err != nil {
 		return nil, err
 	}
 	return r, nil
 }
 // Free returns the count of port left in the range.
 func (r *PortAllocator) Free() int {
 	return r.alloc.Free()
 }
 // Used returns the count of ports used in the range.
 func (r *PortAllocator) Used() int {
 	return r.portRange.Size - r.alloc.Free()
 }
 // Allocate attempts to reserve the provided port. ErrNotInRange or
 // ErrAllocated will be returned if the port is not valid for this range
 // or has already been reserved.  ErrFull will be returned if there
@ -117,6 +136,13 @@ func (r *PortAllocator) AllocateNext() (int, error) {
 	return r.portRange.Base + offset, nil
 }
 // ForEach calls the provided function for each allocated port.
 func (r *PortAllocator) ForEach(fn func(int)) {
 	r.alloc.ForEach(func(offset int) {
 		fn(r.portRange.Base + offset)
 	})
 }
 // Release releases the port back to the pool. Releasing an
 // unallocated port or a port out of the range is a no-op and
 // returns no error.
--- a/pkg/registry/core/service/portallocator/allocator_test.go
+++ b/pkg/registry/core/service/portallocator/allocator_test.go
@ -35,6 +35,9 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 201 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 0 {
 		t.Errorf("unexpected used %d", f)
 	}
 	found := sets.NewString()
 	count := 0
 	for r.Free() > 0 {
@ -62,6 +65,9 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 1 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 200 {
 		t.Errorf("unexpected used %d", f)
 	}
 	p, err := r.AllocateNext()
 	if err != nil {
 		t.Fatal(err)
@ -95,12 +101,56 @@ func TestAllocate(t *testing.T) {
 	if f := r.Free(); f != 1 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 200 {
 		t.Errorf("unexpected used %d", f)
 	}
 	if err := r.Allocate(released); err != nil {
 		t.Fatal(err)
 	}
 	if f := r.Free(); f != 0 {
 		t.Errorf("unexpected free %d", f)
 	}
 	if f := r.Used(); f != 201 {
 		t.Errorf("unexpected used %d", f)
 	}
 }
 func TestForEach(t *testing.T) {
 	pr, err := net.ParsePortRange("10000-10200")
 	if err != nil {
 		t.Fatal(err)
 	}
 	testCases := []sets.Int{
 		sets.NewInt(),
 		sets.NewInt(10000),
 		sets.NewInt(10000, 10200),
 		sets.NewInt(10000, 10099, 10200),
 	}
 	for i, tc := range testCases {
 		r := NewPortAllocator(*pr)
 		for port := range tc {
 			if err := r.Allocate(port); err != nil {
 				t.Errorf("[%d] error allocating port %v: %v", i, port, err)
 			}
 			if !r.Has(port) {
 				t.Errorf("[%d] expected port %v allocated", i, port)
 			}
 		}
 		calls := sets.NewInt()
 		r.ForEach(func(port int) {
 			calls.Insert(port)
 		})
 		if len(calls) != len(tc) {
 			t.Errorf("[%d] expected %d calls, got %d", i, len(tc), len(calls))
 		}
 		if !calls.Equal(tc) {
 			t.Errorf("[%d] expected calls to equal testcase: %v vs %v", i, calls.List(), tc.List())
 		}
 	}
 }
 func TestSnapshot(t *testing.T) {
@ -155,3 +205,42 @@ func TestSnapshot(t *testing.T) {
 		t.Errorf("counts do not match: %d", other.Free())
 	}
 }
 func TestNewFromSnapshot(t *testing.T) {
 	pr, err := net.ParsePortRange("200-300")
 	if err != nil {
 		t.Fatal(err)
 	}
 	r := NewPortAllocator(*pr)
 	allocated := []int{}
 	for i := 0; i < 50; i++ {
 		p, err := r.AllocateNext()
 		if err != nil {
 			t.Fatal(err)
 		}
 		allocated = append(allocated, p)
 	}
 	snapshot := api.RangeAllocation{}
 	if err = r.Snapshot(&snapshot); err != nil {
 		t.Fatal(err)
 	}
 	r, err = NewFromSnapshot(&snapshot)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if x := r.Free(); x != 51 {
 		t.Fatalf("expected 51 free ports, got %d", x)
 	}
 	if x := r.Used(); x != 50 {
 		t.Fatalf("expected 50 used port, got %d", x)
 	}
 	for _, p := range allocated {
 		if !r.Has(p) {
 			t.Fatalf("expected port to be allocated, but it was not")
 		}
 	}
 }
--- a/pkg/registry/core/service/portallocator/controller/BUILD
+++ b/pkg/registry/core/service/portallocator/controller/BUILD
@ -5,6 +5,7 @@ licenses(["notice"])
 load(
    "@io_bazel_rules_go//go:def.bzl",
    "go_library",
    "go_test",
 )
 go_library(
@ -24,3 +25,16 @@ go_library(
        "//pkg/util/wait:go_default_library",
    ],
 )
 go_test(
    name = "go_default_test",
    srcs = ["repair_test.go"],
    library = "go_default_library",
    tags = ["automanaged"],
    deps = [
        "//pkg/api:go_default_library",
        "//pkg/client/clientset_generated/internalclientset/fake:go_default_library",
        "//pkg/registry/core/service/portallocator:go_default_library",
        "//pkg/util/net:go_default_library",
    ],
 )
--- a/pkg/registry/core/service/portallocator/controller/repair.go
+++ b/pkg/registry/core/service/portallocator/controller/repair.go
@ -38,8 +38,13 @@ type Repair struct {
 	serviceClient coreclient.ServicesGetter
 	portRange     net.PortRange
 	alloc         rangeallocation.RangeRegistry
 	leaks         map[int]int // counter per leaked port
 }
 // How many times we need to detect a leak before we clean up.  This is to
 // avoid races between allocating a ports and using it.
 const numRepairsBeforeLeakCleanup = 3
 // NewRepair creates a controller that periodically ensures that all ports are uniquely allocated across the cluster
 // and generates informational warnings for a cluster that is not in sync.
 func NewRepair(interval time.Duration, serviceClient coreclient.ServicesGetter, portRange net.PortRange, alloc rangeallocation.RangeRegistry) *Repair {
@ -48,6 +53,7 @@ func NewRepair(interval time.Duration, serviceClient coreclient.ServicesGetter,
 		serviceClient: serviceClient,
 		portRange:     portRange,
 		alloc:         alloc,
 		leaks:         map[int]int{},
 	}
 }
@ -76,21 +82,28 @@ func (c *Repair) runOnce() error {
 	// If etcd server is not running we should wait for some time and fail only then. This is particularly
 	// important when we start apiserver and etcd at the same time.
-	var latest *api.RangeAllocation
+	var snapshot *api.RangeAllocation
 	err := wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
 		var err error
-	for i := 0; i < 10; i++ {
+		snapshot, err = c.alloc.Get()
-		if latest, err = c.alloc.Get(); err != nil {
+		return err == nil, err
-			time.Sleep(time.Second)
+	})
 		} else {
 			break
 		}
 	}
 	if err != nil {
-		return fmt.Errorf("unable to refresh the port block: %v", err)
+		return fmt.Errorf("unable to refresh the port allocations: %v", err)
 	}
 	// If not yet initialized.
 	if snapshot.Range == "" {
 		snapshot.Range = c.portRange.String()
 	}
 	// Create an allocator because it is easy to use.
 	stored, err := portallocator.NewFromSnapshot(snapshot)
 	if err != nil {
 		return fmt.Errorf("unable to rebuild allocator from snapshot: %v", err)
 	}
 	// We explicitly send no resource version, since the resource version
-	// of 'latest' is from a different collection, it's not comparable to
+	// of 'snapshot' is from a different collection, it's not comparable to
 	// the service collection. The caching layer keeps per-collection RVs,
 	// and this is proper, since in theory the collections could be hosted
 	// in separate etcd (or even non-etcd) instances.
@ -99,7 +112,8 @@ func (c *Repair) runOnce() error {
 		return fmt.Errorf("unable to refresh the port block: %v", err)
 	}
-	r := portallocator.NewPortAllocator(c.portRange)
+	rebuilt := portallocator.NewPortAllocator(c.portRange)
 	// Check every Service's ports, and rebuild the state as we think it should be.
 	for i := range list.Items {
 		svc := &list.Items[i]
 		ports := service.CollectServiceNodePorts(svc)
@ -108,18 +122,27 @@ func (c *Repair) runOnce() error {
 		}
 		for _, port := range ports {
-			switch err := r.Allocate(port); err {
+			switch err := rebuilt.Allocate(port); err {
 			case nil:
 				if stored.Has(port) {
 					// remove it from the old set, so we can find leaks
 					stored.Release(port)
 				} else {
 					// doesn't seem to be allocated
 					runtime.HandleError(fmt.Errorf("the node port %d for service %s/%s is not allocated; repairing", port, svc.Name, svc.Namespace))
 				}
 				delete(c.leaks, port) // it is used, so it can't be leaked
 			case portallocator.ErrAllocated:
 				// TODO: send event
-				// port is broken, reallocate
+				// port is duplicate, reallocate
-				runtime.HandleError(fmt.Errorf("the port %d for service %s/%s was assigned to multiple services; please recreate", port, svc.Name, svc.Namespace))
+				runtime.HandleError(fmt.Errorf("the node port %d for service %s/%s was assigned to multiple services; please recreate", port, svc.Name, svc.Namespace))
 			case err.(*portallocator.ErrNotInRange):
 				// TODO: send event
-				// port is broken, reallocate
+				// port is out of range, reallocate
 				runtime.HandleError(fmt.Errorf("the port %d for service %s/%s is not within the port range %v; please recreate", port, svc.Name, svc.Namespace, c.portRange))
 			case portallocator.ErrFull:
 				// TODO: send event
 				// somehow we are out of ports
 				return fmt.Errorf("the port range %v is full; you must widen the port range in order to create new services", c.portRange)
 			default:
 				return fmt.Errorf("unable to allocate port %d for service %s/%s due to an unknown error, exiting: %v", port, svc.Name, svc.Namespace, err)
@ -127,12 +150,33 @@ func (c *Repair) runOnce() error {
 		}
 	}
-	err = r.Snapshot(latest)
+	// Check for ports that are left in the old set.  They appear to have been leaked.
-	if err != nil {
+	stored.ForEach(func(port int) {
 		count, found := c.leaks[port]
 		switch {
 		case !found:
 			// flag it to be cleaned up after any races (hopefully) are gone
 			runtime.HandleError(fmt.Errorf("the node port %d may have leaked: flagging for later clean up", port))
 			count = numRepairsBeforeLeakCleanup - 1
 			fallthrough
 		case count > 0:
 			// pretend it is still in use until count expires
 			c.leaks[port] = count - 1
 			if err := rebuilt.Allocate(port); err != nil {
 				runtime.HandleError(fmt.Errorf("the node port %d may have leaked, but can not be allocated: %v", port, err))
 			}
 		default:
 			// do not add it to the rebuilt set, which means it will be available for reuse
 			runtime.HandleError(fmt.Errorf("the node port %d appears to have leaked: cleaning up", port))
 		}
 	})
 	// Blast the rebuilt state into storage.
 	if err := rebuilt.Snapshot(snapshot); err != nil {
 		return fmt.Errorf("unable to snapshot the updated port allocations: %v", err)
 	}
-	if err := c.alloc.CreateOrUpdate(latest); err != nil {
+	if err := c.alloc.CreateOrUpdate(snapshot); err != nil {
 		if errors.IsConflict(err) {
 			return err
 		}
--- a/pkg/registry/core/service/portallocator/controller/repair_test.go
+++ b/pkg/registry/core/service/portallocator/controller/repair_test.go
@ -0,0 +1,191 @@
 /*
 Copyright 2016 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package controller
 import (
 	"fmt"
 	"strings"
 	"testing"
 	"k8s.io/kubernetes/pkg/api"
 	"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
 	"k8s.io/kubernetes/pkg/registry/core/service/portallocator"
 	"k8s.io/kubernetes/pkg/util/net"
 )
 type mockRangeRegistry struct {
 	getCalled bool
 	item      *api.RangeAllocation
 	err       error
 	updateCalled bool
 	updated      *api.RangeAllocation
 	updateErr    error
 }
 func (r *mockRangeRegistry) Get() (*api.RangeAllocation, error) {
 	r.getCalled = true
 	return r.item, r.err
 }
 func (r *mockRangeRegistry) CreateOrUpdate(alloc *api.RangeAllocation) error {
 	r.updateCalled = true
 	r.updated = alloc
 	return r.updateErr
 }
 func TestRepair(t *testing.T) {
 	fakeClient := fake.NewSimpleClientset()
 	registry := &mockRangeRegistry{
 		item: &api.RangeAllocation{Range: "100-200"},
 	}
 	pr, _ := net.ParsePortRange(registry.item.Range)
 	r := NewRepair(0, fakeClient.Core(), *pr, registry)
 	if err := r.RunOnce(); err != nil {
 		t.Fatal(err)
 	}
 	if !registry.updateCalled || registry.updated == nil || registry.updated.Range != pr.String() || registry.updated != registry.item {
 		t.Errorf("unexpected registry: %#v", registry)
 	}
 	registry = &mockRangeRegistry{
 		item:      &api.RangeAllocation{Range: "100-200"},
 		updateErr: fmt.Errorf("test error"),
 	}
 	r = NewRepair(0, fakeClient.Core(), *pr, registry)
 	if err := r.RunOnce(); !strings.Contains(err.Error(), ": test error") {
 		t.Fatal(err)
 	}
 }
 func TestRepairLeak(t *testing.T) {
 	pr, _ := net.ParsePortRange("100-200")
 	previous := portallocator.NewPortAllocator(*pr)
 	previous.Allocate(111)
 	var dst api.RangeAllocation
 	err := previous.Snapshot(&dst)
 	if err != nil {
 		t.Fatal(err)
 	}
 	fakeClient := fake.NewSimpleClientset()
 	registry := &mockRangeRegistry{
 		item: &api.RangeAllocation{
 			ObjectMeta: api.ObjectMeta{
 				ResourceVersion: "1",
 			},
 			Range: dst.Range,
 			Data:  dst.Data,
 		},
 	}
 	r := NewRepair(0, fakeClient.Core(), *pr, registry)
 	// Run through the "leak detection holdoff" loops.
 	for i := 0; i < (numRepairsBeforeLeakCleanup - 1); i++ {
 		if err := r.RunOnce(); err != nil {
 			t.Fatal(err)
 		}
 		after, err := portallocator.NewFromSnapshot(registry.updated)
 		if err != nil {
 			t.Fatal(err)
 		}
 		if !after.Has(111) {
 			t.Errorf("expected portallocator to still have leaked port")
 		}
 	}
 	// Run one more time to actually remove the leak.
 	if err := r.RunOnce(); err != nil {
 		t.Fatal(err)
 	}
 	after, err := portallocator.NewFromSnapshot(registry.updated)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if after.Has(111) {
 		t.Errorf("expected portallocator to not have leaked port")
 	}
 }
 func TestRepairWithExisting(t *testing.T) {
 	pr, _ := net.ParsePortRange("100-200")
 	previous := portallocator.NewPortAllocator(*pr)
 	var dst api.RangeAllocation
 	err := previous.Snapshot(&dst)
 	if err != nil {
 		t.Fatal(err)
 	}
 	fakeClient := fake.NewSimpleClientset(
 		&api.Service{
 			ObjectMeta: api.ObjectMeta{Namespace: "one", Name: "one"},
 			Spec: api.ServiceSpec{
 				Ports: []api.ServicePort{{NodePort: 111}},
 			},
 		},
 		&api.Service{
 			ObjectMeta: api.ObjectMeta{Namespace: "two", Name: "two"},
 			Spec: api.ServiceSpec{
 				Ports: []api.ServicePort{{NodePort: 122}, {NodePort: 133}},
 			},
 		},
 		&api.Service{ // outside range, will be dropped
 			ObjectMeta: api.ObjectMeta{Namespace: "three", Name: "three"},
 			Spec: api.ServiceSpec{
 				Ports: []api.ServicePort{{NodePort: 201}},
 			},
 		},
 		&api.Service{ // empty, ignored
 			ObjectMeta: api.ObjectMeta{Namespace: "four", Name: "four"},
 			Spec: api.ServiceSpec{
 				Ports: []api.ServicePort{{}},
 			},
 		},
 		&api.Service{ // duplicate, dropped
 			ObjectMeta: api.ObjectMeta{Namespace: "five", Name: "five"},
 			Spec: api.ServiceSpec{
 				Ports: []api.ServicePort{{NodePort: 111}},
 			},
 		},
 	)
 	registry := &mockRangeRegistry{
 		item: &api.RangeAllocation{
 			ObjectMeta: api.ObjectMeta{
 				ResourceVersion: "1",
 			},
 			Range: dst.Range,
 			Data:  dst.Data,
 		},
 	}
 	r := NewRepair(0, fakeClient.Core(), *pr, registry)
 	if err := r.RunOnce(); err != nil {
 		t.Fatal(err)
 	}
 	after, err := portallocator.NewFromSnapshot(registry.updated)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if !after.Has(111) || !after.Has(122) || !after.Has(133) {
 		t.Errorf("unexpected portallocator state: %#v", after)
 	}
 	if free := after.Free(); free != 98 {
 		t.Errorf("unexpected portallocator state: %d free", free)
 	}
 }
--- a/pkg/volume/glusterfs/BUILD
+++ b/pkg/volume/glusterfs/BUILD
@ -25,7 +25,6 @@ go_library(
        "//pkg/apis/storage/v1beta1/util:go_default_library",
        "//pkg/client/clientset_generated/clientset:go_default_library",
        "//pkg/labels:go_default_library",
        "//pkg/registry/core/service/allocator:go_default_library",
        "//pkg/types:go_default_library",
        "//pkg/util/exec:go_default_library",
        "//pkg/util/mount:go_default_library",
--- a/pkg/volume/glusterfs/glusterfs_minmax.go
+++ b/pkg/volume/glusterfs/glusterfs_minmax.go
@ -25,8 +25,6 @@ package glusterfs
 import (
 	"errors"
 	"sync"
 	"k8s.io/kubernetes/pkg/registry/core/service/allocator"
 )
 var (
@ -51,7 +49,11 @@ var _ Rangeable = &MinMaxAllocator{}
 // Rangeable is an Interface that can adjust its min/max range.
 // Rangeable should be threadsafe
 type Rangeable interface {
-	allocator.Interface
+	Allocate(int) (bool, error)
 	AllocateNext() (int, bool, error)
 	Release(int) error
 	Has(int) bool
 	Free() int
 	SetRange(min, max int) error
 }