From b607ca1bf3e1cf6152c446ea61ac7fdd9014e1f1 Mon Sep 17 00:00:00 2001 From: Abu Kashem Date: Tue, 2 Mar 2021 19:27:43 -0500 Subject: [PATCH] apiserver: improve correlation by using the audit ID - when we forward the request to the aggregated server, set the audit ID in the new request header. This allows audit logs from aggregated apiservers to be correlated with the kube-apiserver. - use the audit ID in the current tracer - use the audit ID in httplog - when a request panics, log an error with the audit ID. --- .../pkg/endpoints/handlers/helpers.go | 15 +++++++ .../pkg/endpoints/handlers/trace_util.go | 1 + .../pkg/server/filters/timeout_test.go | 4 +- .../apiserver/pkg/server/filters/wrap.go | 4 +- .../apiserver/pkg/server/httplog/httplog.go | 4 ++ .../pkg/apiserver/handler_proxy.go | 7 ++++ .../pkg/apiserver/handler_proxy_test.go | 41 +++++++++++++++++++ 7 files changed, 72 insertions(+), 4 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/helpers.go b/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/helpers.go index 244a3fd0a51..3fb7beccad1 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/helpers.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/helpers.go @@ -20,6 +20,7 @@ import ( "net/http" utilnet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/apiserver/pkg/endpoints/request" ) const ( @@ -73,3 +74,17 @@ func (lazy *lazyAccept) String() string { return "unknown" } + +// lazyAuditID implements Stringer interface to lazily retrieve +// the audit ID associated with the request. +type lazyAuditID struct { + req *http.Request +} + +func (lazy *lazyAuditID) String() string { + if lazy.req != nil { + return request.GetAuditIDTruncated(lazy.req) + } + + return "unknown" +} diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/trace_util.go b/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/trace_util.go index 69b41fac4e3..8d7c4e1b526 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/trace_util.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/handlers/trace_util.go @@ -26,6 +26,7 @@ func traceFields(req *http.Request) []utiltrace.Field { return []utiltrace.Field{ {Key: "url", Value: req.URL.Path}, {Key: "user-agent", Value: &lazyTruncatedUserAgent{req: req}}, + {Key: "audit-id", Value: &lazyAuditID{req: req}}, {Key: "client", Value: &lazyClientIP{req: req}}, {Key: "accept", Value: &lazyAccept{req: req}}, {Key: "protocol", Value: req.Proto}} diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go index faf8c1ad81a..1cd24358413 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go @@ -263,7 +263,7 @@ func TestErrConnKilled(t *testing.T) { if strings.Count(capturedOutput, "\n") != 1 { t.Errorf("unexpected output captured actual = %v", capturedOutput) } - if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) { + if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } } @@ -353,7 +353,7 @@ func TestErrConnKilledHTTP2(t *testing.T) { if strings.Count(capturedOutput, "\n") != 1 { t.Errorf("unexpected output captured actual = %v", capturedOutput) } - if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) { + if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go b/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go index 34c5398dba9..408011fc6c0 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go @@ -51,11 +51,11 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve // This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated // in case the rate limit delays it. If you outrun the rate for this one timed out requests, something has gone // seriously wrong with your server, but generally having a logging signal for timeouts is useful. - runtime.HandleError(fmt.Errorf("timeout or abort while handling: %v %q", req.Method, req.URL.Path)) + runtime.HandleError(fmt.Errorf("timeout or abort while handling: method=%v URI=%q audit-ID=%q", req.Method, req.RequestURI, request.GetAuditIDTruncated(req))) return } http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError) - klog.Errorf("apiserver panic'd on %v %v", req.Method, req.RequestURI) + klog.ErrorS(nil, "apiserver panic'd", "method", req.Method, "URI", req.RequestURI, "audit-ID", request.GetAuditIDTruncated(req)) }) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/httplog/httplog.go b/staging/src/k8s.io/apiserver/pkg/server/httplog/httplog.go index 4cb5306672b..af205fade6c 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/httplog/httplog.go +++ b/staging/src/k8s.io/apiserver/pkg/server/httplog/httplog.go @@ -25,6 +25,7 @@ import ( "runtime" "time" + "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/klog/v2" ) @@ -157,12 +158,14 @@ func (rl *respLogger) Addf(format string, data ...interface{}) { func (rl *respLogger) LogArgs() []interface{} { latency := time.Since(rl.startTime) + auditID := request.GetAuditIDTruncated(rl.req) if rl.hijacked { return []interface{}{ "verb", rl.req.Method, "URI", rl.req.RequestURI, "latency", latency, "userAgent", rl.req.UserAgent(), + "audit-ID", auditID, "srcIP", rl.req.RemoteAddr, "hijacked", true, } @@ -172,6 +175,7 @@ func (rl *respLogger) LogArgs() []interface{} { "URI", rl.req.RequestURI, "latency", latency, "userAgent", rl.req.UserAgent(), + "audit-ID", auditID, "srcIP", rl.req.RemoteAddr, "resp", rl.status, } diff --git a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go index e09f0d1a238..0c7588ab17c 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go +++ b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/util/httpstream/spdy" utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/util/proxy" + auditinternal "k8s.io/apiserver/pkg/apis/audit" "k8s.io/apiserver/pkg/endpoints/handlers/responsewriters" endpointmetrics "k8s.io/apiserver/pkg/endpoints/metrics" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" @@ -201,6 +202,12 @@ func newRequestForProxy(location *url.URL, req *http.Request) (*http.Request, co newReq.URL = location newReq.Host = location.Host + // If the original request has an audit ID, let's make sure we propagate this + // to the aggregated server. + if auditID, found := genericapirequest.AuditIDFrom(req.Context()); found { + newReq.Header.Set(auditinternal.HeaderAuditID, string(auditID)) + } + return newReq, cancelFn } diff --git a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy_test.go b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy_test.go index 95282152b85..30fa28b2308 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy_test.go +++ b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy_test.go @@ -36,6 +36,7 @@ import ( "golang.org/x/net/websocket" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/proxy" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apiserver/pkg/authentication/user" @@ -577,6 +578,46 @@ func TestGetContextForNewRequest(t *testing.T) { } +func TestNewRequestForProxyWithAuditID(t *testing.T) { + tests := []struct { + name string + auditID string + }{ + { + name: "original request has Audit-ID", + auditID: "foo-bar", + }, + { + name: "original request does not have Audit-ID", + auditID: "", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + req, err := http.NewRequest(http.MethodGet, "/api/group/version/foos/namespace/foo", nil) + if err != nil { + t.Fatalf("failed to create new http request - %v", err) + } + + req = req.WithContext(genericapirequest.WithRequestInfo(req.Context(), &genericapirequest.RequestInfo{Path: req.URL.Path})) + if len(test.auditID) > 0 { + req = req.WithContext(genericapirequest.WithAuditID(req.Context(), types.UID(test.auditID))) + } + + newReq, _ := newRequestForProxy(req.URL, req) + if newReq == nil { + t.Fatal("expected a non nil Request object") + } + + auditIDGot := newReq.Header.Get("Audit-ID") + if test.auditID != auditIDGot { + t.Errorf("expected an Audit-ID value: %q, but got: %q", test.auditID, auditIDGot) + } + }) + } +} + // TestProxyCertReload verifies that the proxy reloading of certificates work // to be able to test the reloading it starts a server with client auth enabled // it first uses certs that does not match the client CA so the verification fails - expecting HTTP 503