k8s.io/apimachinery: add HandleCrashWithLogger and HandleErrorWithLogger

There are situations where it makes more sense to pass a logger through a
call chain, primarily because passing a context would imply that the call chain
should honor cancellation even though there is a different shutdown mechanism.

Using the *WithContext variants would cause additional overhead for
klog.NewContext, which hurts in particular for HandleCrash because that
function is typically a nop that doesn't actually need to log
anything. HandleCrashWithLogger avoids that overhead.

For HandleError that is less relevant because it always logs, but for the sake
of symmetry it also gets added.

Putting klog.Logger (= logr.Logger) into the public Kubernetes Go API is okay
because it's no longer realistic that these packages can ever drop the klog
dependency. Callers using slog as logger in their binary can use
https://github.com/veqryn/slog-context to store a slog.Logger in a context and
then call the *WithContext variants, klog.FromContext will be able to use it.
This is probably very rare, so there's no need for *WithSlog variants.

While at it, unit testing gets enhanced and logging in panic handlers gets
improved such that they are guaranteed to get a saner location when not
doing any caller skipping. Previously, this was undefined.
This commit is contained in:
Patrick Ohly 2024-11-28 15:19:58 +01:00
parent 95d71c464a
commit e3c584030c
2 changed files with 164 additions and 9 deletions

View File

@ -36,6 +36,11 @@ var (
) )
// PanicHandlers is a list of functions which will be invoked when a panic happens. // PanicHandlers is a list of functions which will be invoked when a panic happens.
//
// The code invoking these handlers prepares a contextual logger so that
// klog.FromContext(ctx) already skips over the panic handler itself and
// several other intermediate functions, ideally such that the log output
// is attributed to the code which triggered the panic.
var PanicHandlers = []func(context.Context, interface{}){logPanic} var PanicHandlers = []func(context.Context, interface{}){logPanic}
// HandleCrash simply catches a crash and logs an error. Meant to be called via // HandleCrash simply catches a crash and logs an error. Meant to be called via
@ -45,7 +50,7 @@ var PanicHandlers = []func(context.Context, interface{}){logPanic}
// //
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully. // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
// //
// Contextual logging: HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging. // Contextual logging: HandleCrashWithContext or HandleCrashWithLogger should be used instead of HandleCrash in code which supports contextual logging.
func HandleCrash(additionalHandlers ...func(interface{})) { func HandleCrash(additionalHandlers ...func(interface{})) {
if r := recover(); r != nil { if r := recover(); r != nil {
additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers)) additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers))
@ -74,10 +79,30 @@ func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(cont
} }
} }
// handleCrash is the common implementation of HandleCrash and HandleCrash. // HandleCrashWithLogger simply catches a crash and logs an error. Meant to be called via
// defer. Additional context-specific handlers can be provided, and will be
// called in case of panic. HandleCrash actually crashes, after calling the
// handlers and logging the panic message.
//
// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
func HandleCrashWithLogger(logger klog.Logger, additionalHandlers ...func(context.Context, interface{})) {
if r := recover(); r != nil {
ctx := klog.NewContext(context.Background(), logger)
handleCrash(ctx, r, additionalHandlers...)
}
}
// handleCrash is the common implementation of the HandleCrash* variants.
// Having those call a common implementation ensures that the stack depth // Having those call a common implementation ensures that the stack depth
// is the same regardless through which path the handlers get invoked. // is the same regardless through which path the handlers get invoked.
func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) { func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) {
// We don't really know how many call frames to skip because the Go
// panic handler is between us and the code where the panic occurred.
// If it's one function (as in Go 1.21), then skipping four levels
// gets us to the function which called the `defer HandleCrashWithontext(...)`.
logger := klog.FromContext(ctx).WithCallDepth(4)
ctx = klog.NewContext(ctx, logger)
for _, fn := range PanicHandlers { for _, fn := range PanicHandlers {
fn(ctx, r) fn(ctx, r)
} }
@ -106,11 +131,7 @@ func logPanic(ctx context.Context, r interface{}) {
stacktrace := make([]byte, size) stacktrace := make([]byte, size)
stacktrace = stacktrace[:runtime.Stack(stacktrace, false)] stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
// We don't really know how many call frames to skip because the Go logger := klog.FromContext(ctx)
// panic handler is between us and the code where the panic occurred.
// If it's one function (as in Go 1.21), then skipping four levels
// gets us to the function which called the `defer HandleCrashWithontext(...)`.
logger := klog.FromContext(ctx).WithCallDepth(4)
// For backwards compatibility, conversion to string // For backwards compatibility, conversion to string
// is handled here instead of defering to the logging // is handled here instead of defering to the logging
@ -176,12 +197,19 @@ func HandleError(err error) {
// and key/value pairs. // and key/value pairs.
// //
// This variant should be used instead of HandleError because it supports // This variant should be used instead of HandleError because it supports
// structured, contextual logging. // structured, contextual logging. Alternatively, [HandleErrorWithLogger] can
// be used if a logger is available instead of a context.
func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
handleError(ctx, err, msg, keysAndValues...) handleError(ctx, err, msg, keysAndValues...)
} }
// handleError is the common implementation of HandleError and HandleErrorWithContext. // HandleErrorWithLogger is an alternative to [HandlerErrorWithContext] which accepts
// a logger for contextual logging.
func HandleErrorWithLogger(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
handleError(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
}
// handleError is the common implementation of the HandleError* variants.
// Using this common implementation ensures that the stack depth // Using this common implementation ensures that the stack depth
// is the same regardless through which path the handlers get invoked. // is the same regardless through which path the handlers get invoked.
func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {

View File

@ -19,15 +19,22 @@ package runtime
import ( import (
"bytes" "bytes"
"context" "context"
"errors"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"os" "os"
"regexp" "regexp"
"runtime"
"strings" "strings"
"sync" "sync"
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/assert"
"k8s.io/klog/v2"
"k8s.io/klog/v2/textlogger"
) )
func TestHandleCrash(t *testing.T) { func TestHandleCrash(t *testing.T) {
@ -36,6 +43,7 @@ func TestHandleCrash(t *testing.T) {
t.Errorf("Expected a panic to recover from") t.Errorf("Expected a panic to recover from")
} }
}() }()
//nolint:logcheck // Intentionally uses the old API.
defer HandleCrash() defer HandleCrash()
panic("Test Panic") panic("Test Panic")
} }
@ -55,6 +63,7 @@ func TestCustomHandleCrash(t *testing.T) {
t.Errorf("Expected a panic to recover from") t.Errorf("Expected a panic to recover from")
} }
}() }()
//nolint:logcheck // Intentionally uses the old API.
defer HandleCrash() defer HandleCrash()
panic("test") panic("test")
}() }()
@ -73,6 +82,7 @@ func TestCustomHandleError(t *testing.T) {
}, },
} }
err := fmt.Errorf("test") err := fmt.Errorf("test")
//nolint:logcheck // Intentionally uses the old API.
HandleError(err) HandleError(err)
if result != err { if result != err {
t.Errorf("did not receive custom handler") t.Errorf("did not receive custom handler")
@ -86,6 +96,7 @@ func TestHandleCrashLog(t *testing.T) {
t.Fatalf("expected a panic to recover from") t.Fatalf("expected a panic to recover from")
} }
}() }()
//nolint:logcheck // Intentionally uses the old API.
defer HandleCrash() defer HandleCrash()
panic("test panic") panic("test panic")
}) })
@ -119,6 +130,72 @@ func TestHandleCrashLog(t *testing.T) {
} }
} }
func TestHandleCrashContextual(t *testing.T) {
for name, handleCrash := range map[string]func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})){
"WithLogger": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
logger = logger.WithCallDepth(2) // This function *and* the trigger helper.
defer HandleCrashWithLogger(logger, additionalHandlers...)
trigger()
},
"WithContext": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
logger = logger.WithCallDepth(2)
defer HandleCrashWithContext(klog.NewContext(context.Background(), logger), additionalHandlers...)
trigger()
},
} {
t.Run(name, func(t *testing.T) {
for name, tt := range map[string]struct {
trigger func()
expectPanic string
}{
"no-panic": {
trigger: func() {},
expectPanic: "",
},
"string-panic": {
trigger: func() { panic("fake") },
expectPanic: "fake",
},
"int-panic": {
trigger: func() { panic(42) },
expectPanic: "42",
},
} {
t.Run(name, func(t *testing.T) {
var buffer bytes.Buffer
timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
timeString := "1201 13:30:40.000042"
logger := textlogger.NewLogger(textlogger.NewConfig(
textlogger.FixedTime(timeInUTC),
textlogger.Output(&buffer),
))
ReallyCrash = false
defer func() { ReallyCrash = true }()
handler := func(ctx context.Context, r interface{}) {
// Same formatting as in HandleCrash.
str, ok := r.(string)
if !ok {
str = fmt.Sprintf("%v", r)
}
klog.FromContext(ctx).Info("handler called", "panic", str)
}
_, _, line, _ := runtime.Caller(0)
handleCrash(logger, tt.trigger, handler)
if tt.expectPanic != "" {
assert.Contains(t, buffer.String(), fmt.Sprintf(`E%s %7d runtime_test.go:%d] "Observed a panic" panic=%q`, timeString, os.Getpid(), line+1, tt.expectPanic))
assert.Contains(t, buffer.String(), fmt.Sprintf(`I%s %7d runtime_test.go:%d] "handler called" panic=%q
`, timeString, os.Getpid(), line+1, tt.expectPanic))
} else {
assert.Empty(t, buffer.String())
}
})
}
})
}
}
func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) { func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) {
log, err := captureStderr(func() { log, err := captureStderr(func() {
defer func() { defer func() {
@ -126,6 +203,7 @@ func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) {
t.Fatalf("expected to recover from http.ErrAbortHandler") t.Fatalf("expected to recover from http.ErrAbortHandler")
} }
}() }()
//nolint:logcheck // Intentionally uses the old API.
defer HandleCrash() defer HandleCrash()
panic(http.ErrAbortHandler) panic(http.ErrAbortHandler)
}) })
@ -184,3 +262,52 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) {
t.Errorf("OnError slept for too long: %s", since) t.Errorf("OnError slept for too long: %s", since)
} }
} }
func TestHandleError(t *testing.T) {
for name, handleError := range map[string]func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}){
"WithLogger": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
helper, logger := logger.WithCallStackHelper()
helper()
HandleErrorWithLogger(logger, err, msg, keysAndValues...)
},
"WithContext": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
helper, logger := logger.WithCallStackHelper()
helper()
HandleErrorWithContext(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
},
} {
t.Run(name, func(t *testing.T) {
for name, tc := range map[string]struct {
err error
msg string
keysAndValues []interface{}
expectLog string
}{
"no-error": {
msg: "hello world",
expectLog: `"hello world" logger="UnhandledError"`,
},
"complex": {
err: errors.New("fake error"),
msg: "ignore",
keysAndValues: []interface{}{"a", 1, "b", "c"},
expectLog: `"ignore" err="fake error" logger="UnhandledError" a=1 b="c"`,
},
} {
t.Run(name, func(t *testing.T) {
var buffer bytes.Buffer
timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
timeString := "1201 13:30:40.000042"
logger := textlogger.NewLogger(textlogger.NewConfig(
textlogger.FixedTime(timeInUTC),
textlogger.Output(&buffer),
))
_, _, line, _ := runtime.Caller(0)
handleError(logger, tc.err, tc.msg, tc.keysAndValues...)
assert.Equal(t, fmt.Sprintf("E%s %7d runtime_test.go:%d] %s\n", timeString, os.Getpid(), line+1, tc.expectLog), buffer.String())
})
}
})
}
}