diff --git a/hack/golangci-hints.yaml b/hack/golangci-hints.yaml index cd98978d137..3b30ada8e02 100644 --- a/hack/golangci-hints.yaml +++ b/hack/golangci-hints.yaml @@ -127,6 +127,7 @@ linters-settings: # please keep this alphabetized # The following packages have been migrated to contextual logging. # Packages matched here do not have to be listed above because # "contextual" implies "structured". + contextual k8s.io/apimachinery/pkg/util/runtime/.* contextual k8s.io/client-go/metadata/.* contextual k8s.io/client-go/tools/events/.* contextual k8s.io/client-go/tools/record/.* diff --git a/hack/golangci-strict.yaml b/hack/golangci-strict.yaml index ca7f524889a..7d9615a1473 100644 --- a/hack/golangci-strict.yaml +++ b/hack/golangci-strict.yaml @@ -173,6 +173,7 @@ linters-settings: # please keep this alphabetized # The following packages have been migrated to contextual logging. # Packages matched here do not have to be listed above because # "contextual" implies "structured". + contextual k8s.io/apimachinery/pkg/util/runtime/.* contextual k8s.io/client-go/metadata/.* contextual k8s.io/client-go/tools/events/.* contextual k8s.io/client-go/tools/record/.* diff --git a/hack/golangci.yaml b/hack/golangci.yaml index 94de6bcf3f8..c099e20a6c8 100644 --- a/hack/golangci.yaml +++ b/hack/golangci.yaml @@ -176,6 +176,7 @@ linters-settings: # please keep this alphabetized # The following packages have been migrated to contextual logging. # Packages matched here do not have to be listed above because # "contextual" implies "structured". + contextual k8s.io/apimachinery/pkg/util/runtime/.* contextual k8s.io/client-go/metadata/.* contextual k8s.io/client-go/tools/events/.* contextual k8s.io/client-go/tools/record/.* diff --git a/hack/logcheck.conf b/hack/logcheck.conf index ffb490368af..bee1fdc1026 100644 --- a/hack/logcheck.conf +++ b/hack/logcheck.conf @@ -24,6 +24,7 @@ structured k8s.io/apiserver/pkg/server/options/encryptionconfig/.* # The following packages have been migrated to contextual logging. # Packages matched here do not have to be listed above because # "contextual" implies "structured". +contextual k8s.io/apimachinery/pkg/util/runtime/.* contextual k8s.io/client-go/metadata/.* contextual k8s.io/client-go/tools/events/.* contextual k8s.io/client-go/tools/record/.* diff --git a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go index 3674914f701..4fe0c5eb250 100644 --- a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go +++ b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime.go @@ -17,6 +17,7 @@ limitations under the License. package runtime import ( + "context" "fmt" "net/http" "runtime" @@ -35,7 +36,7 @@ var ( ) // PanicHandlers is a list of functions which will be invoked when a panic happens. -var PanicHandlers = []func(interface{}){logPanic} +var PanicHandlers = []func(context.Context, interface{}){logPanic} // HandleCrash simply catches a crash and logs an error. Meant to be called via // defer. Additional context-specific handlers can be provided, and will be @@ -43,23 +44,54 @@ var PanicHandlers = []func(interface{}){logPanic} // handlers and logging the panic message. // // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully. +// +// TODO(pohly): logcheck:context // HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging. func HandleCrash(additionalHandlers ...func(interface{})) { if r := recover(); r != nil { - for _, fn := range PanicHandlers { - fn(r) - } - for _, fn := range additionalHandlers { - fn(r) - } - if ReallyCrash { - // Actually proceed to panic. - panic(r) + additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers)) + for i, handler := range additionalHandlers { + handler := handler // capture loop variable + additionalHandlersWithContext[i] = func(_ context.Context, r interface{}) { + handler(r) + } } + + handleCrash(context.Background(), r, additionalHandlersWithContext...) + } +} + +// HandleCrashWithContext simply catches a crash and logs an error. Meant to be called via +// defer. Additional context-specific handlers can be provided, and will be +// called in case of panic. HandleCrash actually crashes, after calling the +// handlers and logging the panic message. +// +// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully. +// +// The context is used to determine how to log. +func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(context.Context, interface{})) { + if r := recover(); r != nil { + handleCrash(ctx, r, additionalHandlers...) + } +} + +// handleCrash is the common implementation of HandleCrash and HandleCrash. +// Having those call a common implementation ensures that the stack depth +// is the same regardless through which path the handlers get invoked. +func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) { + for _, fn := range PanicHandlers { + fn(ctx, r) + } + for _, fn := range additionalHandlers { + fn(ctx, r) + } + if ReallyCrash { + // Actually proceed to panic. + panic(r) } } // logPanic logs the caller tree when a panic occurs (except in the special case of http.ErrAbortHandler). -func logPanic(r interface{}) { +func logPanic(ctx context.Context, r interface{}) { if r == http.ErrAbortHandler { // honor the http.ErrAbortHandler sentinel panic value: // ErrAbortHandler is a sentinel panic value to abort a handler. @@ -73,10 +105,20 @@ func logPanic(r interface{}) { const size = 64 << 10 stacktrace := make([]byte, size) stacktrace = stacktrace[:runtime.Stack(stacktrace, false)] + + // We don't really know how many call frames to skip because the Go + // panic handler is between us and the code where the panic occurred. + // If it's one function (as in Go 1.21), then skipping four levels + // gets us to the function which called the `defer HandleCrashWithontext(...)`. + logger := klog.FromContext(ctx).WithCallDepth(4) + + // For backwards compatibility, conversion to string + // is handled here instead of defering to the logging + // backend. if _, ok := r.(string); ok { - klog.Errorf("Observed a panic: %s\n%s", r, stacktrace) + logger.Error(nil, "Observed a panic", "panic", r, "stacktrace", string(stacktrace)) } else { - klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace) + logger.Error(nil, "Observed a panic", "panic", fmt.Sprintf("%v", r), "panicGoValue", fmt.Sprintf("%#v", r), "stacktrace", string(stacktrace)) } } @@ -84,35 +126,76 @@ func logPanic(r interface{}) { // error occurs. // TODO(lavalamp): for testability, this and the below HandleError function // should be packaged up into a testable and reusable object. -var ErrorHandlers = []func(error){ +var ErrorHandlers = []ErrorHandler{ logError, - (&rudimentaryErrorBackoff{ - lastErrorTime: time.Now(), - // 1ms was the number folks were able to stomach as a global rate limit. - // If you need to log errors more than 1000 times a second you - // should probably consider fixing your code instead. :) - minPeriod: time.Millisecond, - }).OnError, + func(_ context.Context, _ error, _ string, _ ...interface{}) { + (&rudimentaryErrorBackoff{ + lastErrorTime: time.Now(), + // 1ms was the number folks were able to stomach as a global rate limit. + // If you need to log errors more than 1000 times a second you + // should probably consider fixing your code instead. :) + minPeriod: time.Millisecond, + }).OnError() + }, } +type ErrorHandler func(ctx context.Context, err error, msg string, keysAndValues ...interface{}) + // HandlerError is a method to invoke when a non-user facing piece of code cannot // return an error and needs to indicate it has been ignored. Invoking this method // is preferable to logging the error - the default behavior is to log but the // errors may be sent to a remote server for analysis. +// +// TODO(pohly): logcheck:context // HandleErrorWithContext should be used instead of HandleError in code which supports contextual logging. func HandleError(err error) { // this is sometimes called with a nil error. We probably shouldn't fail and should do nothing instead if err == nil { return } + handleError(context.Background(), err, "Unhandled Error") +} + +// HandlerErrorWithContext is a method to invoke when a non-user facing piece of code cannot +// return an error and needs to indicate it has been ignored. Invoking this method +// is preferable to logging the error - the default behavior is to log but the +// errors may be sent to a remote server for analysis. The context is used to +// determine how to log the error. +// +// If contextual logging is enabled, the default log output is equivalent to +// +// logr.FromContext(ctx).WithName("UnhandledError").Error(err, msg, keysAndValues...) +// +// Without contextual logging, it is equivalent to: +// +// klog.ErrorS(err, msg, keysAndValues...) +// +// In contrast to HandleError, passing nil for the error is still going to +// trigger a log entry. Don't construct a new error or wrap an error +// with fmt.Errorf. Instead, add additional information via the mssage +// and key/value pairs. +// +// This variant should be used instead of HandleError because it supports +// structured, contextual logging. +func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { + handleError(ctx, err, msg, keysAndValues...) +} + +// handleError is the common implementation of HandleError and HandleErrorWithContext. +// Using this common implementation ensures that the stack depth +// is the same regardless through which path the handlers get invoked. +func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { for _, fn := range ErrorHandlers { - fn(err) + fn(ctx, err, msg, keysAndValues...) } } -// logError prints an error with the call stack of the location it was reported -func logError(err error) { - klog.ErrorDepth(2, err) +// logError prints an error with the call stack of the location it was reported. +// It expects to be called as -> HandleError[WithContext] -> handleError -> logError. +func logError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { + logger := klog.FromContext(ctx).WithCallDepth(3) + logger = klog.LoggerWithName(logger, "UnhandledError") + logger.Error(err, msg, keysAndValues...) //nolint:logcheck // logcheck complains about unknown key/value pairs. } type rudimentaryErrorBackoff struct { @@ -125,7 +208,7 @@ type rudimentaryErrorBackoff struct { // OnError will block if it is called more often than the embedded period time. // This will prevent overly tight hot error loops. -func (r *rudimentaryErrorBackoff) OnError(error) { +func (r *rudimentaryErrorBackoff) OnError() { now := time.Now() // start the timer before acquiring the lock r.lastErrorTimeLock.Lock() d := now.Sub(r.lastErrorTime) diff --git a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_stack_test.go b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_stack_test.go new file mode 100644 index 00000000000..380ad5b1a53 --- /dev/null +++ b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_stack_test.go @@ -0,0 +1,71 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "bytes" + "context" + "errors" + "flag" + "fmt" + "regexp" + + "k8s.io/klog/v2" +) + +//nolint:logcheck // Several functions are normally not okay in a package. +func ExampleHandleErrorWithContext() { + state := klog.CaptureState() + defer state.Restore() + var fs flag.FlagSet + klog.InitFlags(&fs) + for flag, value := range map[string]string{ + "one_output": "true", + "logtostderr": "false", + } { + if err := fs.Set(flag, value); err != nil { + fmt.Printf("Unexpected error configuring klog: %v", err) + return + } + } + var buffer bytes.Buffer + klog.SetOutput(&buffer) + + logger := klog.Background() + logger = klog.LoggerWithValues(logger, "request", 42) + ctx := klog.NewContext(context.Background(), logger) + + // The line number of the next call must be at line 60. Here are some + // blank lines that can be removed to keep the line unchanged. + // + // + // + // + // + // + HandleErrorWithContext(ctx, errors.New("fake error"), "test") + + klog.Flush() + // Strip varying header. Code location should be constant and something + // that needs to be tested. + output := buffer.String() + output = regexp.MustCompile(`^.* ([^[:space:]]*.go:[[:digit:]]*)\] `).ReplaceAllString(output, `xxx $1] `) + fmt.Print(output) + + // Output: + // xxx runtime_stack_test.go:60] "test" err="fake error" logger="UnhandledError" request=42 +} diff --git a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go index c886b6826ff..77e6edbd4e1 100644 --- a/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go +++ b/staging/src/k8s.io/apimachinery/pkg/util/runtime/runtime_test.go @@ -18,6 +18,7 @@ package runtime import ( "bytes" + "context" "fmt" "io" "net/http" @@ -43,8 +44,8 @@ func TestCustomHandleCrash(t *testing.T) { old := PanicHandlers defer func() { PanicHandlers = old }() var result interface{} - PanicHandlers = []func(interface{}){ - func(r interface{}) { + PanicHandlers = []func(context.Context, interface{}){ + func(_ context.Context, r interface{}) { result = r }, } @@ -66,8 +67,8 @@ func TestCustomHandleError(t *testing.T) { old := ErrorHandlers defer func() { ErrorHandlers = old }() var result error - ErrorHandlers = []func(error){ - func(err error) { + ErrorHandlers = []ErrorHandler{ + func(_ context.Context, err error, msg string, keysAndValues ...interface{}) { result = err }, } @@ -101,7 +102,8 @@ func TestHandleCrashLog(t *testing.T) { if len(lines) < 4 { t.Fatalf("panic log should have 1 line of message, 1 line per goroutine and 2 lines per function call") } - if match, _ := regexp.MatchString("Observed a panic: test panic", lines[0]); !match { + t.Logf("Got log output:\n%s", strings.Join(lines, "\n")) + if match, _ := regexp.MatchString(`"Observed a panic" panic="test panic"`, lines[0]); !match { t.Errorf("mismatch panic message: %s", lines[0]) } // The following regexp's verify that Kubernetes panic log matches Golang stdlib @@ -170,7 +172,7 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) { wg.Add(1) go func() { <-start - r.OnError(nil) // input error is ignored + r.OnError() wg.Done() }() } diff --git a/staging/src/k8s.io/apimachinery/pkg/util/wait/wait_test.go b/staging/src/k8s.io/apimachinery/pkg/util/wait/wait_test.go index c8dd0bf58b3..f05decdec41 100644 --- a/staging/src/k8s.io/apimachinery/pkg/util/wait/wait_test.go +++ b/staging/src/k8s.io/apimachinery/pkg/util/wait/wait_test.go @@ -194,8 +194,8 @@ func TestJitterUntilRecoversPanic(t *testing.T) { // Hook up a custom crash handler to ensure it is called when a jitter function panics runtime.ReallyCrash = false - runtime.PanicHandlers = []func(interface{}){ - func(p interface{}) { + runtime.PanicHandlers = []func(context.Context, interface{}){ + func(_ context.Context, p interface{}) { handled++ }, } diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go index 432851a631c..e9438732213 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go @@ -137,9 +137,7 @@ func (t *timeoutHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } metrics.RecordRequestPostTimeout(metrics.PostTimeoutSourceTimeoutHandler, status) - err := fmt.Errorf("post-timeout activity - time-elapsed: %s, %v %q result: %v", - time.Since(timedOutAt), r.Method, r.URL.Path, res) - utilruntime.HandleError(err) + utilruntime.HandleErrorWithContext(r.Context(), nil, "Post-timeout activity", "timeElapsed", time.Since(timedOutAt), "method", r.Method, "path", r.URL.Path, "result", res) }() }() httplog.SetStacktracePredicate(r.Context(), func(status int) bool { diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go index 78b4ba7a89b..ffb15926bd4 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go @@ -408,7 +408,9 @@ func TestErrConnKilled(t *testing.T) { if isStackTraceLoggedByRuntime(capturedOutput) { t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput) } - if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { + // For the sake of simplicity and clarity this matches the full log line. + // This is not part of the Kubernetes API and could change. + if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } } @@ -501,7 +503,9 @@ func TestErrConnKilledHTTP2(t *testing.T) { if isStackTraceLoggedByRuntime(capturedOutput) { t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput) } - if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { + // For the sake of simplicity and clarity this matches the full log line. + // This is not part of the Kubernetes API and could change. + if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go b/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go index a981eae78a8..73ce2702604 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go @@ -17,7 +17,6 @@ limitations under the License. package filters import ( - "fmt" "net/http" "k8s.io/apimachinery/pkg/util/runtime" @@ -51,7 +50,7 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve // This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated // in case the rate limit delays it. If you outrun the rate for this one timed out requests, something has gone // seriously wrong with your server, but generally having a logging signal for timeouts is useful. - runtime.HandleError(fmt.Errorf("timeout or abort while handling: method=%v URI=%q audit-ID=%q", req.Method, req.RequestURI, audit.GetAuditIDTruncated(req.Context()))) + runtime.HandleErrorWithContext(req.Context(), nil, "Timeout or abort while handling", "method", req.Method, "URI", req.RequestURI, "auditID", audit.GetAuditIDTruncated(req.Context())) return } http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError) diff --git a/staging/src/k8s.io/apiserver/pkg/util/proxy/websocket.go b/staging/src/k8s.io/apiserver/pkg/util/proxy/websocket.go index 3b9746b3b2f..798ce176724 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/proxy/websocket.go +++ b/staging/src/k8s.io/apiserver/pkg/util/proxy/websocket.go @@ -79,7 +79,7 @@ func webSocketServerStreams(req *http.Request, w http.ResponseWriter, opts Optio if p := recover(); p != nil { // Standard panic logging. for _, fn := range runtime.PanicHandlers { - fn(p) + fn(req.Context(), p) } } }()