k8s.io/apimachinery: add HandleCrashWithLogger and HandleErrorWithLogger

There are situations where it makes more sense to pass a logger through a call chain, primarily because passing a context would imply that the call chain should honor cancellation even though there is a different shutdown mechanism. Using the *WithContext variants would cause additional overhead for klog.NewContext, which hurts in particular for HandleCrash because that function is typically a nop that doesn't actually need to log anything. HandleCrashWithLogger avoids that overhead. For HandleError that is less relevant because it always logs, but for the sake of symmetry it also gets added. Putting klog.Logger (= logr.Logger) into the public Kubernetes Go API is okay because it's no longer realistic that these packages can ever drop the klog dependency. Callers using slog as logger in their binary can use https://github.com/veqryn/slog-context to store a slog.Logger in a context and then call the *WithContext variants, klog.FromContext will be able to use it. This is probably very rare, so there's no need for *WithSlog variants. While at it, unit testing gets enhanced and logging in panic handlers gets improved such that they are guaranteed to get a saner location when not doing any caller skipping. Previously, this was undefined.
2025-08-12 05:21:58 +00:00 · 2024-11-28 15:19:58 +01:00 · 2024-11-28 15:19:58 +01:00 · e3c584030c
commit e3c584030c
parent 95d71c464a
2 changed files with 164 additions and 9 deletions
--- a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go
+++ b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go
@ -36,6 +36,11 @@ var (
 )

 // PanicHandlers is a list of functions which will be invoked when a panic happens.
+//
+// The code invoking these handlers prepares a contextual logger so that
+// klog.FromContext(ctx) already skips over the panic handler itself and
+// several other intermediate functions, ideally such that the log output
+// is attributed to the code which triggered the panic.
 var PanicHandlers = []func(context.Context, interface{}){logPanic}

 // HandleCrash simply catches a crash and logs an error. Meant to be called via
@ -45,7 +50,7 @@ var PanicHandlers = []func(context.Context, interface{}){logPanic}
 //
 // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
 //
-// Contextual logging: HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging.
+// Contextual logging: HandleCrashWithContext or HandleCrashWithLogger should be used instead of HandleCrash in code which supports contextual logging.
 func HandleCrash(additionalHandlers ...func(interface{})) {
 	if r := recover(); r != nil {
 		additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers))
@ -74,10 +79,30 @@ func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(cont
 	}
 }

-// handleCrash is the common implementation of HandleCrash and HandleCrash.
+// HandleCrashWithLogger simply catches a crash and logs an error. Meant to be called via
+// defer.  Additional context-specific handlers can be provided, and will be
+// called in case of panic.  HandleCrash actually crashes, after calling the
+// handlers and logging the panic message.
+//
+// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
+func HandleCrashWithLogger(logger klog.Logger, additionalHandlers ...func(context.Context, interface{})) {
+	if r := recover(); r != nil {
+		ctx := klog.NewContext(context.Background(), logger)
+		handleCrash(ctx, r, additionalHandlers...)
+	}
+}
+
+// handleCrash is the common implementation of the HandleCrash* variants.
 // Having those call a common implementation ensures that the stack depth
 // is the same regardless through which path the handlers get invoked.
 func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) {
+	// We don't really know how many call frames to skip because the Go
+	// panic handler is between us and the code where the panic occurred.
+	// If it's one function (as in Go 1.21), then skipping four levels
+	// gets us to the function which called the `defer HandleCrashWithontext(...)`.
+	logger := klog.FromContext(ctx).WithCallDepth(4)
+	ctx = klog.NewContext(ctx, logger)
+
 	for _, fn := range PanicHandlers {
 		fn(ctx, r)
 	}
@ -106,11 +131,7 @@ func logPanic(ctx context.Context, r interface{}) {
 	stacktrace := make([]byte, size)
 	stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]

-	// We don't really know how many call frames to skip because the Go
-	// panic handler is between us and the code where the panic occurred.
-	// If it's one function (as in Go 1.21), then skipping four levels
-	// gets us to the function which called the `defer HandleCrashWithontext(...)`.
-	logger := klog.FromContext(ctx).WithCallDepth(4)
+	logger := klog.FromContext(ctx)

 	// For backwards compatibility, conversion to string
 	// is handled here instead of defering to the logging
@ -176,12 +197,19 @@ func HandleError(err error) {
 // and key/value pairs.
 //
 // This variant should be used instead of HandleError because it supports
-// structured, contextual logging.
+// structured, contextual logging. Alternatively, [HandleErrorWithLogger] can
+// be used if a logger is available instead of a context.
 func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
 	handleError(ctx, err, msg, keysAndValues...)
 }

-// handleError is the common implementation of HandleError and HandleErrorWithContext.
+// HandleErrorWithLogger is an alternative to [HandlerErrorWithContext] which accepts
+// a logger for contextual logging.
+func HandleErrorWithLogger(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
+	handleError(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
+}
+
+// handleError is the common implementation of the HandleError* variants.
 // Using this common implementation ensures that the stack depth
 // is the same regardless through which path the handlers get invoked.
 func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) {
--- a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go
+++ b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go
@ -19,15 +19,22 @@ package runtime
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"regexp"
+	"runtime"
 	"strings"
 	"sync"
 	"testing"
 	"time"
+
+	"github.com/stretchr/testify/assert"
+
+	"k8s.io/klog/v2"
+	"k8s.io/klog/v2/textlogger"
 )

 func TestHandleCrash(t *testing.T) {
@ -36,6 +43,7 @@ func TestHandleCrash(t *testing.T) {
 			t.Errorf("Expected a panic to recover from")
 		}
 	}()
+	//nolint:logcheck // Intentionally uses the old API.
 	defer HandleCrash()
 	panic("Test Panic")
 }
@ -55,6 +63,7 @@ func TestCustomHandleCrash(t *testing.T) {
 				t.Errorf("Expected a panic to recover from")
 			}
 		}()
+		//nolint:logcheck // Intentionally uses the old API.
 		defer HandleCrash()
 		panic("test")
 	}()
@ -73,6 +82,7 @@ func TestCustomHandleError(t *testing.T) {
 		},
 	}
 	err := fmt.Errorf("test")
+	//nolint:logcheck // Intentionally uses the old API.
 	HandleError(err)
 	if result != err {
 		t.Errorf("did not receive custom handler")
@ -86,6 +96,7 @@ func TestHandleCrashLog(t *testing.T) {
 				t.Fatalf("expected a panic to recover from")
 			}
 		}()
+		//nolint:logcheck // Intentionally uses the old API.
 		defer HandleCrash()
 		panic("test panic")
 	})
@ -119,6 +130,72 @@ func TestHandleCrashLog(t *testing.T) {
 	}
 }

+func TestHandleCrashContextual(t *testing.T) {
+	for name, handleCrash := range map[string]func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})){
+		"WithLogger": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
+			logger = logger.WithCallDepth(2) // This function *and* the trigger helper.
+			defer HandleCrashWithLogger(logger, additionalHandlers...)
+			trigger()
+		},
+		"WithContext": func(logger klog.Logger, trigger func(), additionalHandlers ...func(context.Context, interface{})) {
+			logger = logger.WithCallDepth(2)
+			defer HandleCrashWithContext(klog.NewContext(context.Background(), logger), additionalHandlers...)
+			trigger()
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			for name, tt := range map[string]struct {
+				trigger     func()
+				expectPanic string
+			}{
+				"no-panic": {
+					trigger:     func() {},
+					expectPanic: "",
+				},
+				"string-panic": {
+					trigger:     func() { panic("fake") },
+					expectPanic: "fake",
+				},
+				"int-panic": {
+					trigger:     func() { panic(42) },
+					expectPanic: "42",
+				},
+			} {
+				t.Run(name, func(t *testing.T) {
+					var buffer bytes.Buffer
+					timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
+					timeString := "1201 13:30:40.000042"
+					logger := textlogger.NewLogger(textlogger.NewConfig(
+						textlogger.FixedTime(timeInUTC),
+						textlogger.Output(&buffer),
+					))
+					ReallyCrash = false
+					defer func() { ReallyCrash = true }()
+
+					handler := func(ctx context.Context, r interface{}) {
+						// Same formatting as in HandleCrash.
+						str, ok := r.(string)
+						if !ok {
+							str = fmt.Sprintf("%v", r)
+						}
+						klog.FromContext(ctx).Info("handler called", "panic", str)
+					}
+
+					_, _, line, _ := runtime.Caller(0)
+					handleCrash(logger, tt.trigger, handler)
+					if tt.expectPanic != "" {
+						assert.Contains(t, buffer.String(), fmt.Sprintf(`E%s %7d runtime_test.go:%d] "Observed a panic" panic=%q`, timeString, os.Getpid(), line+1, tt.expectPanic))
+						assert.Contains(t, buffer.String(), fmt.Sprintf(`I%s %7d runtime_test.go:%d] "handler called" panic=%q
+`, timeString, os.Getpid(), line+1, tt.expectPanic))
+					} else {
+						assert.Empty(t, buffer.String())
+					}
+				})
+			}
+		})
+	}
+}
+
 func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) {
 	log, err := captureStderr(func() {
 		defer func() {
@ -126,6 +203,7 @@ func TestHandleCrashLogSilenceHTTPErrAbortHandler(t *testing.T) {
 				t.Fatalf("expected to recover from http.ErrAbortHandler")
 			}
 		}()
+		//nolint:logcheck // Intentionally uses the old API.
 		defer HandleCrash()
 		panic(http.ErrAbortHandler)
 	})
@ -184,3 +262,52 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) {
 		t.Errorf("OnError slept for too long: %s", since)
 	}
 }
+
+func TestHandleError(t *testing.T) {
+	for name, handleError := range map[string]func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}){
+		"WithLogger": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
+			helper, logger := logger.WithCallStackHelper()
+			helper()
+			HandleErrorWithLogger(logger, err, msg, keysAndValues...)
+		},
+		"WithContext": func(logger klog.Logger, err error, msg string, keysAndValues ...interface{}) {
+			helper, logger := logger.WithCallStackHelper()
+			helper()
+			HandleErrorWithContext(klog.NewContext(context.Background(), logger), err, msg, keysAndValues...)
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			for name, tc := range map[string]struct {
+				err           error
+				msg           string
+				keysAndValues []interface{}
+				expectLog     string
+			}{
+				"no-error": {
+					msg:       "hello world",
+					expectLog: `"hello world" logger="UnhandledError"`,
+				},
+				"complex": {
+					err:           errors.New("fake error"),
+					msg:           "ignore",
+					keysAndValues: []interface{}{"a", 1, "b", "c"},
+					expectLog:     `"ignore" err="fake error" logger="UnhandledError" a=1 b="c"`,
+				},
+			} {
+				t.Run(name, func(t *testing.T) {
+					var buffer bytes.Buffer
+					timeInUTC := time.Date(2009, 12, 1, 13, 30, 40, 42000, time.UTC)
+					timeString := "1201 13:30:40.000042"
+					logger := textlogger.NewLogger(textlogger.NewConfig(
+						textlogger.FixedTime(timeInUTC),
+						textlogger.Output(&buffer),
+					))
+
+					_, _, line, _ := runtime.Caller(0)
+					handleError(logger, tc.err, tc.msg, tc.keysAndValues...)
+					assert.Equal(t, fmt.Sprintf("E%s %7d runtime_test.go:%d] %s\n", timeString, os.Getpid(), line+1, tc.expectLog), buffer.String())
+				})
+			}
+		})
+	}
+}