diff --git a/pkg/util/deadlock-detector.go b/pkg/util/deadlock-detector.go index 8b241ccf968..b89fb23de3e 100644 --- a/pkg/util/deadlock-detector.go +++ b/pkg/util/deadlock-detector.go @@ -24,11 +24,6 @@ import ( "github.com/golang/glog" ) -type Lockable interface { - Lock() - Unlock() -} - type rwMutexToLockableAdapter struct { rw *sync.RWMutex } @@ -43,9 +38,12 @@ func (r *rwMutexToLockableAdapter) Unlock() { type deadlockDetector struct { name string - lock Lockable + lock sync.Locker maxLockPeriod time.Duration - clock Clock + exiter exiter + exitChannelFn func() <-chan time.Time + // Really only useful for testing + stopChannel <-chan bool } // DeadlockWatchdogReadLock creates a watchdog on read/write mutex. If the mutex can not be acquired @@ -55,37 +53,68 @@ func DeadlockWatchdogReadLock(lock *sync.RWMutex, name string, maxLockPeriod tim DeadlockWatchdog(&rwMutexToLockableAdapter{lock}, name, maxLockPeriod) } -func DeadlockWatchdog(lock Lockable, name string, maxLockPeriod time.Duration) { +func DeadlockWatchdog(lock sync.Locker, name string, maxLockPeriod time.Duration) { + if maxLockPeriod <= 0 { + panic("maxLockPeriod is <= 0, that can't be what you wanted") + } detector := &deadlockDetector{ - lock: lock, - name: name, - clock: RealClock{}, + lock: lock, + name: name, + maxLockPeriod: maxLockPeriod, + exitChannelFn: func() <-chan time.Time { return time.After(maxLockPeriod) }, + stopChannel: make(chan bool), } go detector.run() } +// Useful for injecting tests +type exiter interface { + Exitf(format string, args ...interface{}) +} + +type realExiter struct{} + +func (realExiter) Exitf(format string, args ...interface{}) { + func() { + defer func() { + // Let's just be extra sure we die, even if Exitf panics + if r := recover(); r != nil { + glog.Errorf(format, args...) + os.Exit(2) + } + }() + glog.Exitf(format, args...) + }() +} + func (d *deadlockDetector) run() { for { - ch := make(chan bool, 1) - go func() { - d.lock.Lock() - d.lock.Unlock() - - ch <- true - }() - select { - case <-time.After(d.maxLockPeriod): - go func() { - defer func() { - // Let's just be extra sure we die, even if Exitf panics - glog.Errorf("Failed to Exitf for %s, dying anyway", d.name) - os.Exit(2) - }() - glog.Exitf("Deadlock on %s, exiting", d.name) - }() - case <-ch: - glog.V(6).Infof("%s is not deadlocked", d.name) + if !d.runOnce() { + return } time.Sleep(d.maxLockPeriod / 2) } } + +func (d *deadlockDetector) runOnce() bool { + ch := make(chan bool, 1) + go func() { + d.lock.Lock() + d.lock.Unlock() + + ch <- true + }() + exitCh := d.exitChannelFn() + select { + case <-exitCh: + d.exiter.Exitf("Deadlock on %s, exiting", d.name) + // return is here for when we use a fake exiter in testing + return false + case <-ch: + glog.V(6).Infof("%s is not deadlocked", d.name) + case <-d.stopChannel: + glog.V(4).Infof("Stopping deadlock detector for %s", d.name) + return false + } + return true +} diff --git a/pkg/util/deadlock-detector_test.go b/pkg/util/deadlock-detector_test.go new file mode 100644 index 00000000000..51d11124583 --- /dev/null +++ b/pkg/util/deadlock-detector_test.go @@ -0,0 +1,127 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "sync" + "testing" + "time" +) + +type fakeExiter struct { + format string + args []interface{} + exited bool +} + +func (f *fakeExiter) Exitf(format string, args ...interface{}) { + f.format = format + f.args = args + f.exited = true +} + +func TestMaxLockPeriod(t *testing.T) { + lock := &sync.RWMutex{} + panicked := false + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + DeadlockWatchdogReadLock(lock, "test lock", 0) + }() + if !panicked { + t.Errorf("expected a panic for a zero max lock period") + } +} + +func TestDeadlockWatchdogLocked(t *testing.T) { + lock := &sync.RWMutex{} + lock.Lock() + + exitCh := make(chan time.Time, 1) + fake := fakeExiter{} + + detector := &deadlockDetector{ + lock: &rwMutexToLockableAdapter{lock}, + name: "test deadlock", + exitChannelFn: func() <-chan time.Time { return exitCh }, + exiter: &fake, + } + + exitCh <- time.Time{} + + detector.run() + + if !fake.exited { + t.Errorf("expected to have exited") + } + + if len(fake.args) != 1 || fake.args[0].(string) != detector.name { + t.Errorf("unexpected args: %v", fake.args) + } +} + +func TestDeadlockWatchdogUnlocked(t *testing.T) { + lock := &sync.RWMutex{} + + fake := fakeExiter{} + + detector := &deadlockDetector{ + lock: &rwMutexToLockableAdapter{lock}, + name: "test deadlock", + exitChannelFn: func() <-chan time.Time { return time.After(time.Second * 5) }, + exiter: &fake, + } + + for i := 0; i < 100; i++ { + detector.runOnce() + } + + if fake.exited { + t.Errorf("expected to have not exited") + } +} + +func TestDeadlockWatchdogLocking(t *testing.T) { + lock := &sync.RWMutex{} + + fake := fakeExiter{} + + go func() { + for { + lock.Lock() + lock.Unlock() + } + }() + + detector := &deadlockDetector{ + lock: &rwMutexToLockableAdapter{lock}, + name: "test deadlock", + exitChannelFn: func() <-chan time.Time { return time.After(time.Second * 5) }, + exiter: &fake, + } + + for i := 0; i < 100; i++ { + detector.runOnce() + } + + if fake.exited { + t.Errorf("expected to have not exited") + } +}