Fix Panic Condition

Currenlty an event recorder can send an event to a
broadcaster that is already stopped, resulting
in a panic.  This ensures the broadcaster holds
a lock while it is shutting down and then forces
any senders to drop queued events following
broadcaster shutdown.

It also updates the Action, ActionOrDrop,  Watch,
and WatchWithPrefix functions to return an error
in the case where data is sent on the closed bradcaster
channel rather than panicing.

Lastly it updates unit tests to ensure the fix works correctly

fixes: https://github.com/kubernetes/kubernetes/issues/108518

Signed-off-by: Andrew Stoycos <astoycos@redhat.com>
This commit is contained in:
Andrew Stoycos 2022-02-11 14:50:19 -05:00
parent 4717a59097
commit 6aa779f4ed
5 changed files with 113 additions and 54 deletions

View File

@ -17,6 +17,7 @@ limitations under the License.
package watch package watch
import ( import (
"fmt"
"sync" "sync"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -44,8 +45,11 @@ type Broadcaster struct {
nextWatcher int64 nextWatcher int64
distributing sync.WaitGroup distributing sync.WaitGroup
incoming chan Event // incomingBlock allows us to ensure we don't race and end up sending events
stopped chan struct{} // to a closed channel following a brodcaster shutdown.
incomingBlock sync.Mutex
incoming chan Event
stopped chan struct{}
// How large to make watcher's channel. // How large to make watcher's channel.
watchQueueLength int watchQueueLength int
@ -132,7 +136,7 @@ func (m *Broadcaster) blockQueue(f func()) {
// Note: new watchers will only receive new events. They won't get an entire history // Note: new watchers will only receive new events. They won't get an entire history
// of previous events. It will block until the watcher is actually added to the // of previous events. It will block until the watcher is actually added to the
// broadcaster. // broadcaster.
func (m *Broadcaster) Watch() Interface { func (m *Broadcaster) Watch() (Interface, error) {
var w *broadcasterWatcher var w *broadcasterWatcher
m.blockQueue(func() { m.blockQueue(func() {
id := m.nextWatcher id := m.nextWatcher
@ -146,11 +150,9 @@ func (m *Broadcaster) Watch() Interface {
m.watchers[id] = w m.watchers[id] = w
}) })
if w == nil { if w == nil {
// The panic here is to be consistent with the previous interface behavior return nil, fmt.Errorf("broadcaster already stopped")
// we are willing to re-evaluate in the future.
panic("broadcaster already stopped")
} }
return w return w, nil
} }
// WatchWithPrefix adds a new watcher to the list and returns an Interface for it. It sends // WatchWithPrefix adds a new watcher to the list and returns an Interface for it. It sends
@ -158,7 +160,7 @@ func (m *Broadcaster) Watch() Interface {
// The returned watch will have a queue length that is at least large enough to accommodate // The returned watch will have a queue length that is at least large enough to accommodate
// all of the items in queuedEvents. It will block until the watcher is actually added to // all of the items in queuedEvents. It will block until the watcher is actually added to
// the broadcaster. // the broadcaster.
func (m *Broadcaster) WatchWithPrefix(queuedEvents []Event) Interface { func (m *Broadcaster) WatchWithPrefix(queuedEvents []Event) (Interface, error) {
var w *broadcasterWatcher var w *broadcasterWatcher
m.blockQueue(func() { m.blockQueue(func() {
id := m.nextWatcher id := m.nextWatcher
@ -179,11 +181,9 @@ func (m *Broadcaster) WatchWithPrefix(queuedEvents []Event) Interface {
} }
}) })
if w == nil { if w == nil {
// The panic here is to be consistent with the previous interface behavior return nil, fmt.Errorf("broadcaster already stopped")
// we are willing to re-evaluate in the future.
panic("broadcaster already stopped")
} }
return w return w, nil
} }
// stopWatching stops the given watcher and removes it from the list. // stopWatching stops the given watcher and removes it from the list.
@ -210,19 +210,38 @@ func (m *Broadcaster) closeAll() {
} }
// Action distributes the given event among all watchers. // Action distributes the given event among all watchers.
func (m *Broadcaster) Action(action EventType, obj runtime.Object) { func (m *Broadcaster) Action(action EventType, obj runtime.Object) error {
m.incomingBlock.Lock()
defer m.incomingBlock.Unlock()
select {
case <-m.stopped:
return fmt.Errorf("broadcaster already stopped")
default:
}
m.incoming <- Event{action, obj} m.incoming <- Event{action, obj}
return nil
} }
// Action distributes the given event among all watchers, or drops it on the floor // Action distributes the given event among all watchers, or drops it on the floor
// if too many incoming actions are queued up. Returns true if the action was sent, // if too many incoming actions are queued up. Returns true if the action was sent,
// false if dropped. // false if dropped.
func (m *Broadcaster) ActionOrDrop(action EventType, obj runtime.Object) bool { func (m *Broadcaster) ActionOrDrop(action EventType, obj runtime.Object) (bool, error) {
m.incomingBlock.Lock()
defer m.incomingBlock.Unlock()
// Ensure that if the broadcaster is stopped we do not send events to it.
select {
case <-m.stopped:
return false, fmt.Errorf("broadcaster already stopped")
default:
}
select { select {
case m.incoming <- Event{action, obj}: case m.incoming <- Event{action, obj}:
return true return true, nil
default: default:
return false return false, nil
} }
} }
@ -233,6 +252,8 @@ func (m *Broadcaster) ActionOrDrop(action EventType, obj runtime.Object) bool {
// have received the data yet as it can remain sitting in the buffered // have received the data yet as it can remain sitting in the buffered
// channel. It will block until the broadcaster stop request is actually executed // channel. It will block until the broadcaster stop request is actually executed
func (m *Broadcaster) Shutdown() { func (m *Broadcaster) Shutdown() {
m.incomingBlock.Lock()
defer m.incomingBlock.Unlock()
m.blockQueue(func() { m.blockQueue(func() {
close(m.stopped) close(m.stopped)
close(m.incoming) close(m.incoming)

View File

@ -22,6 +22,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
@ -58,6 +59,10 @@ func TestBroadcaster(t *testing.T) {
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
wg.Add(testWatchers) wg.Add(testWatchers)
for i := 0; i < testWatchers; i++ { for i := 0; i < testWatchers; i++ {
w, err := m.Watch()
if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
// Verify that each watcher gets the events in the correct order // Verify that each watcher gets the events in the correct order
go func(watcher int, w Interface) { go func(watcher int, w Interface) {
tableLine := 0 tableLine := 0
@ -75,7 +80,7 @@ func TestBroadcaster(t *testing.T) {
tableLine++ tableLine++
} }
wg.Done() wg.Done()
}(i, m.Watch()) }(i, w)
} }
for i, item := range table { for i, item := range table {
@ -90,8 +95,14 @@ func TestBroadcaster(t *testing.T) {
func TestBroadcasterWatcherClose(t *testing.T) { func TestBroadcasterWatcherClose(t *testing.T) {
m := NewBroadcaster(0, WaitIfChannelFull) m := NewBroadcaster(0, WaitIfChannelFull)
w := m.Watch() w, err := m.Watch()
w2 := m.Watch() if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
w2, err := m.Watch()
if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
w.Stop() w.Stop()
m.Shutdown() m.Shutdown()
if _, open := <-w.ResultChan(); open { if _, open := <-w.ResultChan(); open {
@ -108,6 +119,14 @@ func TestBroadcasterWatcherClose(t *testing.T) {
func TestBroadcasterWatcherStopDeadlock(t *testing.T) { func TestBroadcasterWatcherStopDeadlock(t *testing.T) {
done := make(chan bool) done := make(chan bool)
m := NewBroadcaster(0, WaitIfChannelFull) m := NewBroadcaster(0, WaitIfChannelFull)
w, err := m.Watch()
if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
w2, err := m.Watch()
if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
go func(w0, w1 Interface) { go func(w0, w1 Interface) {
// We know Broadcaster is in the distribute loop once one watcher receives // We know Broadcaster is in the distribute loop once one watcher receives
// an event. Stop the other watcher while distribute is trying to // an event. Stop the other watcher while distribute is trying to
@ -119,7 +138,7 @@ func TestBroadcasterWatcherStopDeadlock(t *testing.T) {
w0.Stop() w0.Stop()
} }
close(done) close(done)
}(m.Watch(), m.Watch()) }(w, w2)
m.Action(Added, &myType{}) m.Action(Added, &myType{})
select { select {
case <-time.After(wait.ForeverTestTimeout): case <-time.After(wait.ForeverTestTimeout):
@ -137,8 +156,12 @@ func TestBroadcasterDropIfChannelFull(t *testing.T) {
// Add a couple watchers // Add a couple watchers
watches := make([]Interface, 2) watches := make([]Interface, 2)
var err error
for i := range watches { for i := range watches {
watches[i] = m.Watch() watches[i], err = m.Watch()
if err != nil {
t.Fatalf("Unable start event watcher: '%v' (will not retry!)", err)
}
} }
// Send a couple events before closing the broadcast channel. // Send a couple events before closing the broadcast channel.
@ -194,33 +217,32 @@ func TestBroadcasterWatchAfterShutdown(t *testing.T) {
m := NewBroadcaster(0, WaitIfChannelFull) m := NewBroadcaster(0, WaitIfChannelFull)
m.Shutdown() m.Shutdown()
watch := func() { _, err := m.Watch()
defer func() { assert.EqualError(t, err, "broadcaster already stopped", "Watch should report error id broadcaster is shutdown")
if err := recover(); err == nil {
t.Error("should cause panic")
}
}()
m.Watch()
}
watch()
watchWithPrefix := func() { _, err = m.WatchWithPrefix([]Event{event1, event2})
defer func() { assert.EqualError(t, err, "broadcaster already stopped", "WatchWithPrefix should report error id broadcaster is shutdown")
if err := recover(); err == nil { }
t.Error("should cause panic")
} func TestBroadcasterSendEventAfterShutdown(t *testing.T) {
}() m := NewBroadcaster(1, DropIfChannelFull)
m.WatchWithPrefix([]Event{event1, event2})
} event := Event{Type: Added, Object: &myType{"foo", "hello world"}}
watchWithPrefix()
// Add a couple watchers
action := func() { watches := make([]Interface, 2)
defer func() { for i := range watches {
if err := recover(); err == nil { watches[i], _ = m.Watch()
t.Error("should cause panic") }
} m.Shutdown()
}()
m.Action(event1.Type, event1.Object) // Send a couple events after closing the broadcast channel.
} t.Log("Sending event")
action()
err := m.Action(event.Type, event.Object)
assert.EqualError(t, err, "broadcaster already stopped", "ActionOrDrop should report error id broadcaster is shutdown")
sendOnClosed, err := m.ActionOrDrop(event.Type, event.Object)
assert.Equal(t, sendOnClosed, false, "ActionOrDrop should return false if broadcaster is already shutdown")
assert.EqualError(t, err, "broadcaster already stopped", "ActionOrDrop should report error id broadcaster is shutdown")
} }

View File

@ -275,11 +275,11 @@ func (f *FakeControllerSource) Watch(options metav1.ListOptions) (watch.Interfac
// clients). // clients).
changes = append(changes, watch.Event{Type: c.Type, Object: c.Object.DeepCopyObject()}) changes = append(changes, watch.Event{Type: c.Type, Object: c.Object.DeepCopyObject()})
} }
return f.Broadcaster.WatchWithPrefix(changes), nil return f.Broadcaster.WatchWithPrefix(changes)
} else if rc > f.lastRV { } else if rc > f.lastRV {
return nil, errors.New("resource version in the future not supported by this fake") return nil, errors.New("resource version in the future not supported by this fake")
} }
return f.Broadcaster.Watch(), nil return f.Broadcaster.Watch()
} }
// Shutdown closes the underlying broadcaster, waiting for events to be // Shutdown closes the underlying broadcaster, waiting for events to be

View File

@ -307,7 +307,15 @@ func (e *eventBroadcasterImpl) StartStructuredLogging(verbosity klog.Level) func
// StartEventWatcher starts sending events received from this EventBroadcaster to the given event handler function. // StartEventWatcher starts sending events received from this EventBroadcaster to the given event handler function.
// The return value is used to stop recording // The return value is used to stop recording
func (e *eventBroadcasterImpl) StartEventWatcher(eventHandler func(event runtime.Object)) func() { func (e *eventBroadcasterImpl) StartEventWatcher(eventHandler func(event runtime.Object)) func() {
watcher := e.Watch() watcher, err := e.Watch()
if err != nil {
klog.Errorf("Unable start event watcher: '%v' (will not retry!)", err)
// TODO: Rewrite the function signature to return an error, for
// now just return a no-op function
return func() {
klog.Error("The event watcher failed to start")
}
}
go func() { go func() {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
for { for {

View File

@ -298,7 +298,10 @@ func (e *eventBroadcasterImpl) StartStructuredLogging(verbosity klog.Level) watc
// StartEventWatcher starts sending events received from this EventBroadcaster to the given event handler function. // StartEventWatcher starts sending events received from this EventBroadcaster to the given event handler function.
// The return value can be ignored or used to stop recording, if desired. // The return value can be ignored or used to stop recording, if desired.
func (e *eventBroadcasterImpl) StartEventWatcher(eventHandler func(*v1.Event)) watch.Interface { func (e *eventBroadcasterImpl) StartEventWatcher(eventHandler func(*v1.Event)) watch.Interface {
watcher := e.Watch() watcher, err := e.Watch()
if err != nil {
klog.Errorf("Unable start event watcher: '%v' (will not retry!)", err)
}
go func() { go func() {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
for watchEvent := range watcher.ResultChan() { for watchEvent := range watcher.ResultChan() {
@ -346,7 +349,12 @@ func (recorder *recorderImpl) generateEvent(object runtime.Object, annotations m
// when we go to shut down this broadcaster. Just drop events if we get overloaded, // when we go to shut down this broadcaster. Just drop events if we get overloaded,
// and log an error if that happens (we've configured the broadcaster to drop // and log an error if that happens (we've configured the broadcaster to drop
// outgoing events anyway). // outgoing events anyway).
if sent := recorder.ActionOrDrop(watch.Added, event); !sent { sent, err := recorder.ActionOrDrop(watch.Added, event)
if err != nil {
klog.Errorf("unable to record event: %v (will not retry!)", err)
return
}
if !sent {
klog.Errorf("unable to record event: too many queued events, dropped event %#v", event) klog.Errorf("unable to record event: too many queued events, dropped event %#v", event)
} }
} }