mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 15:25:57 +00:00
Implement batching audit webhook graceful shutdown
This commit is contained in:
parent
4d6db7466c
commit
7798d32fc7
@ -36,6 +36,7 @@ type Backend interface {
|
|||||||
Run(stopCh <-chan struct{}) error
|
Run(stopCh <-chan struct{}) error
|
||||||
|
|
||||||
// Shutdown will synchronously shut down the backend while making sure that all pending
|
// Shutdown will synchronously shut down the backend while making sure that all pending
|
||||||
// events are delivered.
|
// events are delivered. It can be assumed that this method is called after
|
||||||
|
// the stopCh channel passed to the Run method has been closed.
|
||||||
Shutdown()
|
Shutdown()
|
||||||
}
|
}
|
||||||
|
@ -36,8 +36,8 @@ func (f *fakeBackend) Run(stopCh <-chan struct{}) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *fakeBackend) Shutdown() {
|
func (f *fakeBackend) Shutdown() {
|
||||||
// nothing to do here
|
// Nothing to do here.
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnion(t *testing.T) {
|
func TestUnion(t *testing.T) {
|
||||||
|
@ -86,6 +86,6 @@ func (b *backend) Run(stopCh <-chan struct{}) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *backend) Shutdown() {
|
func (b *backend) Shutdown() {
|
||||||
// nothing to do here
|
// Nothing to do here.
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,10 @@ limitations under the License.
|
|||||||
package webhook
|
package webhook
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/apimachinery/pkg/apimachinery/announced"
|
"k8s.io/apimachinery/pkg/apimachinery/announced"
|
||||||
@ -161,6 +163,7 @@ func newBatchWebhook(configFile string) (*batchBackend, error) {
|
|||||||
maxBatchSize: defaultBatchMaxSize,
|
maxBatchSize: defaultBatchMaxSize,
|
||||||
maxBatchWait: defaultBatchMaxWait,
|
maxBatchWait: defaultBatchMaxWait,
|
||||||
cloner: c,
|
cloner: c,
|
||||||
|
shutdownCh: make(chan struct{}),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,28 +182,42 @@ type batchBackend struct {
|
|||||||
// Receiving maxBatchSize events will always trigger a send, regardless of
|
// Receiving maxBatchSize events will always trigger a send, regardless of
|
||||||
// if this amount of time has been reached.
|
// if this amount of time has been reached.
|
||||||
maxBatchWait time.Duration
|
maxBatchWait time.Duration
|
||||||
|
|
||||||
|
// Channel to signal that the sending routine has stopped and therefore
|
||||||
|
// it's safe to assume that no new requests will be initiated.
|
||||||
|
shutdownCh chan struct{}
|
||||||
|
|
||||||
|
// The sending routine locks reqMutex for reading before initiating a new
|
||||||
|
// goroutine to send a request. This goroutine then unlocks reqMutex for
|
||||||
|
// reading when completed. The Shutdown method locks reqMutex for writing
|
||||||
|
// after the sending routine has exited. When reqMutex is locked for writing,
|
||||||
|
// all requests have been completed and no new will be spawned, since the
|
||||||
|
// sending routine is not running anymore.
|
||||||
|
reqMutex sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *batchBackend) Run(stopCh <-chan struct{}) error {
|
func (b *batchBackend) Run(stopCh <-chan struct{}) error {
|
||||||
f := func() {
|
|
||||||
// Recover from any panics caused by this method so a panic in the
|
|
||||||
// goroutine can't bring down the main routine.
|
|
||||||
defer runtime.HandleCrash()
|
|
||||||
|
|
||||||
t := time.NewTimer(b.maxBatchWait)
|
|
||||||
defer t.Stop() // Release ticker resources
|
|
||||||
|
|
||||||
b.sendBatchEvents(stopCh, t.C)
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
// Signal that the sending routine has exited.
|
||||||
f()
|
defer close(b.shutdownCh)
|
||||||
|
|
||||||
select {
|
b.runSendingRoutine(stopCh)
|
||||||
case <-stopCh:
|
|
||||||
return
|
// Handle the events that were received after the last buffer
|
||||||
default:
|
// scraping and before this line. Since the buffer is closed, no new
|
||||||
|
// events will come through.
|
||||||
|
for {
|
||||||
|
if last := func() bool {
|
||||||
|
// Recover from any panic in order to try to send all remaining events.
|
||||||
|
// Note, that in case of a panic, the return value will be false and
|
||||||
|
// the loop execution will continue.
|
||||||
|
defer runtime.HandleCrash()
|
||||||
|
|
||||||
|
events := b.collectLastEvents()
|
||||||
|
b.sendBatchEvents(events)
|
||||||
|
return len(events) == 0
|
||||||
|
}(); last {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -208,25 +225,62 @@ func (b *batchBackend) Run(stopCh <-chan struct{}) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (b *batchBackend) Shutdown() {
|
func (b *batchBackend) Shutdown() {
|
||||||
// TODO: send out batched events
|
<-b.shutdownCh
|
||||||
|
|
||||||
|
// Write locking reqMutex will guarantee that all requests will be completed
|
||||||
|
// by the time the goroutine continues the execution. Since this line is
|
||||||
|
// executed after shutdownCh was closed, no new requests will follow this
|
||||||
|
// lock, because read lock is called in the same goroutine that closes
|
||||||
|
// shutdownCh before exiting.
|
||||||
|
b.reqMutex.Lock()
|
||||||
|
b.reqMutex.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// sendBatchEvents attempts to batch some number of events to the backend. It POSTs events
|
// runSendingRoutine runs a loop that collects events from the buffer. When
|
||||||
// in a goroutine and logging any error encountered during the POST.
|
// stopCh is closed, runSendingRoutine stops and closes the buffer.
|
||||||
|
func (b *batchBackend) runSendingRoutine(stopCh <-chan struct{}) {
|
||||||
|
defer close(b.buffer)
|
||||||
|
|
||||||
|
for {
|
||||||
|
func() {
|
||||||
|
// Recover from any panics caused by this function so a panic in the
|
||||||
|
// goroutine can't bring down the main routine.
|
||||||
|
defer runtime.HandleCrash()
|
||||||
|
|
||||||
|
t := time.NewTimer(b.maxBatchWait)
|
||||||
|
defer t.Stop() // Release ticker resources
|
||||||
|
|
||||||
|
b.sendBatchEvents(b.collectEvents(stopCh, t.C))
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-stopCh:
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectEvents attempts to collect some number of events in a batch.
|
||||||
//
|
//
|
||||||
// The following things can cause sendBatchEvents to exit:
|
// The following things can cause collectEvents to stop and return the list
|
||||||
|
// of events:
|
||||||
//
|
//
|
||||||
// * Some maximum number of events are received.
|
// * Some maximum number of events are received.
|
||||||
// * Timer has passed, all queued events are sent.
|
// * Timer has passed, all queued events are sent.
|
||||||
// * StopCh is closed, all queued events are sent.
|
// * StopCh is closed, all queued events are sent.
|
||||||
//
|
//
|
||||||
func (b *batchBackend) sendBatchEvents(stopCh <-chan struct{}, timer <-chan time.Time) {
|
func (b *batchBackend) collectEvents(stopCh <-chan struct{}, timer <-chan time.Time) []auditinternal.Event {
|
||||||
var events []auditinternal.Event
|
var events []auditinternal.Event
|
||||||
|
|
||||||
L:
|
L:
|
||||||
for i := 0; i < b.maxBatchSize; i++ {
|
for i := 0; i < b.maxBatchSize; i++ {
|
||||||
select {
|
select {
|
||||||
case ev := <-b.buffer:
|
case ev, ok := <-b.buffer:
|
||||||
|
// Buffer channel was closed and no new events will follow.
|
||||||
|
if !ok {
|
||||||
|
break L
|
||||||
|
}
|
||||||
events = append(events, *ev)
|
events = append(events, *ev)
|
||||||
case <-timer:
|
case <-timer:
|
||||||
// Timer has expired. Send whatever events are in the queue.
|
// Timer has expired. Send whatever events are in the queue.
|
||||||
@ -237,15 +291,43 @@ L:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectLastEvents assumes that the buffer was closed. It collects the first
|
||||||
|
// maxBatchSize events from the closed buffer into a batch and returns them.
|
||||||
|
func (b *batchBackend) collectLastEvents() []auditinternal.Event {
|
||||||
|
var events []auditinternal.Event
|
||||||
|
|
||||||
|
for i := 0; i < b.maxBatchSize; i++ {
|
||||||
|
ev, ok := <-b.buffer
|
||||||
|
if !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
events = append(events, *ev)
|
||||||
|
}
|
||||||
|
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendBatchEvents sends a POST requests with the event list in a goroutine
|
||||||
|
// and logs any error encountered.
|
||||||
|
func (b *batchBackend) sendBatchEvents(events []auditinternal.Event) {
|
||||||
if len(events) == 0 {
|
if len(events) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
list := auditinternal.EventList{Items: events}
|
list := auditinternal.EventList{Items: events}
|
||||||
|
|
||||||
|
// Locking reqMutex for read will guarantee that the shutdown process will
|
||||||
|
// block until the goroutine started below is finished. At the same time, it
|
||||||
|
// will not prevent other batches from being proceed further this point.
|
||||||
|
b.reqMutex.RLock()
|
||||||
go func() {
|
go func() {
|
||||||
// Execute the webhook POST in a goroutine to keep it from blocking.
|
// Execute the webhook POST in a goroutine to keep it from blocking.
|
||||||
// This lets the webhook continue to drain the queue immediatly.
|
// This lets the webhook continue to drain the queue immediatly.
|
||||||
|
|
||||||
|
defer b.reqMutex.RUnlock()
|
||||||
defer runtime.HandleCrash()
|
defer runtime.HandleCrash()
|
||||||
|
|
||||||
err := webhook.WithExponentialBackoff(0, func() error {
|
err := webhook.WithExponentialBackoff(0, func() error {
|
||||||
@ -268,10 +350,27 @@ func (b *batchBackend) ProcessEvents(ev ...*auditinternal.Event) {
|
|||||||
// sent to the Sink. Deep copy and send the copy to the queue.
|
// sent to the Sink. Deep copy and send the copy to the queue.
|
||||||
event := e.DeepCopy()
|
event := e.DeepCopy()
|
||||||
|
|
||||||
select {
|
// The following mechanism is in place to support the situation when audit
|
||||||
case b.buffer <- event:
|
// events are still coming after the backend was shut down.
|
||||||
default:
|
var sendErr error
|
||||||
audit.HandlePluginError(pluginName, fmt.Errorf("audit webhook queue blocked"), ev[i:]...)
|
func() {
|
||||||
|
// If the backend was shut down and the buffer channel was closed, an
|
||||||
|
// attempt to add an event to it will result in panic that we should
|
||||||
|
// recover from.
|
||||||
|
defer func() {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
sendErr = errors.New("audit webhook shut down")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case b.buffer <- event:
|
||||||
|
default:
|
||||||
|
sendErr = errors.New("audit webhook queue blocked")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if sendErr != nil {
|
||||||
|
audit.HandlePluginError(pluginName, sendErr, ev[i:]...)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ func TestBatchWebhookMaxEvents(t *testing.T) {
|
|||||||
stopCh := make(chan struct{})
|
stopCh := make(chan struct{})
|
||||||
timer := make(chan time.Time, 1)
|
timer := make(chan time.Time, 1)
|
||||||
|
|
||||||
backend.sendBatchEvents(stopCh, timer)
|
backend.sendBatchEvents(backend.collectEvents(stopCh, timer))
|
||||||
require.Equal(t, defaultBatchMaxSize, <-got, "did not get batch max size")
|
require.Equal(t, defaultBatchMaxSize, <-got, "did not get batch max size")
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -171,7 +171,7 @@ func TestBatchWebhookMaxEvents(t *testing.T) {
|
|||||||
timer <- time.Now() // Trigger the wait timeout
|
timer <- time.Now() // Trigger the wait timeout
|
||||||
}()
|
}()
|
||||||
|
|
||||||
backend.sendBatchEvents(stopCh, timer)
|
backend.sendBatchEvents(backend.collectEvents(stopCh, timer))
|
||||||
require.Equal(t, nRest, <-got, "failed to get the rest of the events")
|
require.Equal(t, nRest, <-got, "failed to get the rest of the events")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,10 +198,78 @@ func TestBatchWebhookStopCh(t *testing.T) {
|
|||||||
waitForEmptyBuffer(backend)
|
waitForEmptyBuffer(backend)
|
||||||
close(stopCh) // stop channel has stopped
|
close(stopCh) // stop channel has stopped
|
||||||
}()
|
}()
|
||||||
backend.sendBatchEvents(stopCh, timer)
|
backend.sendBatchEvents(backend.collectEvents(stopCh, timer))
|
||||||
require.Equal(t, expected, <-got, "get queued events after timer expires")
|
require.Equal(t, expected, <-got, "get queued events after timer expires")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBatchWebhookProcessEventsAfterStop(t *testing.T) {
|
||||||
|
events := make([]*auditinternal.Event, 1) // less than max size.
|
||||||
|
for i := range events {
|
||||||
|
events[i] = &auditinternal.Event{}
|
||||||
|
}
|
||||||
|
|
||||||
|
got := make(chan struct{})
|
||||||
|
s := httptest.NewServer(newWebhookHandler(t, func(events *auditv1alpha1.EventList) {
|
||||||
|
close(got)
|
||||||
|
}))
|
||||||
|
defer s.Close()
|
||||||
|
|
||||||
|
backend := newTestBatchWebhook(t, s.URL)
|
||||||
|
stopCh := make(chan struct{})
|
||||||
|
|
||||||
|
backend.Run(stopCh)
|
||||||
|
close(stopCh)
|
||||||
|
<-backend.shutdownCh
|
||||||
|
backend.ProcessEvents(events...)
|
||||||
|
assert.Equal(t, 0, len(backend.buffer), "processed events after the backed has been stopped")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBatchWebhookShutdown(t *testing.T) {
|
||||||
|
events := make([]*auditinternal.Event, 1)
|
||||||
|
for i := range events {
|
||||||
|
events[i] = &auditinternal.Event{}
|
||||||
|
}
|
||||||
|
|
||||||
|
got := make(chan struct{})
|
||||||
|
contReqCh := make(chan struct{})
|
||||||
|
shutdownCh := make(chan struct{})
|
||||||
|
s := httptest.NewServer(newWebhookHandler(t, func(events *auditv1alpha1.EventList) {
|
||||||
|
close(got)
|
||||||
|
<-contReqCh
|
||||||
|
}))
|
||||||
|
defer s.Close()
|
||||||
|
|
||||||
|
backend := newTestBatchWebhook(t, s.URL)
|
||||||
|
backend.ProcessEvents(events...)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
// Assume stopCh was closed.
|
||||||
|
close(backend.buffer)
|
||||||
|
backend.sendBatchEvents(backend.collectLastEvents())
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-got
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
close(backend.shutdownCh)
|
||||||
|
backend.Shutdown()
|
||||||
|
close(shutdownCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for some time in case there's a bug that allows for the Shutdown
|
||||||
|
// method to exit before all requests has been completed.
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
select {
|
||||||
|
case <-shutdownCh:
|
||||||
|
t.Fatal("Backend shut down before all requests finished")
|
||||||
|
default:
|
||||||
|
// Continue.
|
||||||
|
}
|
||||||
|
|
||||||
|
close(contReqCh)
|
||||||
|
<-shutdownCh
|
||||||
|
}
|
||||||
|
|
||||||
func TestBatchWebhookEmptyBuffer(t *testing.T) {
|
func TestBatchWebhookEmptyBuffer(t *testing.T) {
|
||||||
events := make([]*auditinternal.Event, 1) // less than max size.
|
events := make([]*auditinternal.Event, 1) // less than max size.
|
||||||
for i := range events {
|
for i := range events {
|
||||||
@ -223,7 +291,7 @@ func TestBatchWebhookEmptyBuffer(t *testing.T) {
|
|||||||
timer <- time.Now() // Timer is done.
|
timer <- time.Now() // Timer is done.
|
||||||
|
|
||||||
// Buffer is empty, no events have been queued. This should exit but send no events.
|
// Buffer is empty, no events have been queued. This should exit but send no events.
|
||||||
backend.sendBatchEvents(stopCh, timer)
|
backend.sendBatchEvents(backend.collectEvents(stopCh, timer))
|
||||||
|
|
||||||
// Send additional events after the sendBatchEvents has been called.
|
// Send additional events after the sendBatchEvents has been called.
|
||||||
backend.ProcessEvents(events...)
|
backend.ProcessEvents(events...)
|
||||||
@ -232,7 +300,7 @@ func TestBatchWebhookEmptyBuffer(t *testing.T) {
|
|||||||
timer <- time.Now()
|
timer <- time.Now()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
backend.sendBatchEvents(stopCh, timer)
|
backend.sendBatchEvents(backend.collectEvents(stopCh, timer))
|
||||||
|
|
||||||
// Make sure we didn't get a POST with zero events.
|
// Make sure we didn't get a POST with zero events.
|
||||||
require.Equal(t, expected, <-got, "expected one event")
|
require.Equal(t, expected, <-got, "expected one event")
|
||||||
|
Loading…
Reference in New Issue
Block a user