apiserver: improve correlation by using the audit ID

- when we forward the request to the aggregated server, set the audit
  ID in the new request header. This allows audit logs from aggregated
  apiservers to be correlated with the kube-apiserver.
- use the audit ID in the current tracer
- use the audit ID in httplog
- when a request panics, log an error with the audit ID.
This commit is contained in:
Abu Kashem 2021-03-02 19:27:43 -05:00
parent 31653bacb9
commit b607ca1bf3
No known key found for this signature in database
GPG Key ID: 33A4FA7088DB68A9
7 changed files with 72 additions and 4 deletions

View File

@ -20,6 +20,7 @@ import (
"net/http"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apiserver/pkg/endpoints/request"
)
const (
@ -73,3 +74,17 @@ func (lazy *lazyAccept) String() string {
return "unknown"
}
// lazyAuditID implements Stringer interface to lazily retrieve
// the audit ID associated with the request.
type lazyAuditID struct {
req *http.Request
}
func (lazy *lazyAuditID) String() string {
if lazy.req != nil {
return request.GetAuditIDTruncated(lazy.req)
}
return "unknown"
}

View File

@ -26,6 +26,7 @@ func traceFields(req *http.Request) []utiltrace.Field {
return []utiltrace.Field{
{Key: "url", Value: req.URL.Path},
{Key: "user-agent", Value: &lazyTruncatedUserAgent{req: req}},
{Key: "audit-id", Value: &lazyAuditID{req: req}},
{Key: "client", Value: &lazyClientIP{req: req}},
{Key: "accept", Value: &lazyAccept{req: req}},
{Key: "protocol", Value: req.Proto}}

View File

@ -263,7 +263,7 @@ func TestErrConnKilled(t *testing.T) {
if strings.Count(capturedOutput, "\n") != 1 {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) {
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}
}
@ -353,7 +353,7 @@ func TestErrConnKilledHTTP2(t *testing.T) {
if strings.Count(capturedOutput, "\n") != 1 {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}
if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) {
if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) {
t.Errorf("unexpected output captured actual = %v", capturedOutput)
}

View File

@ -51,11 +51,11 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve
// This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated
// in case the rate limit delays it. If you outrun the rate for this one timed out requests, something has gone
// seriously wrong with your server, but generally having a logging signal for timeouts is useful.
runtime.HandleError(fmt.Errorf("timeout or abort while handling: %v %q", req.Method, req.URL.Path))
runtime.HandleError(fmt.Errorf("timeout or abort while handling: method=%v URI=%q audit-ID=%q", req.Method, req.RequestURI, request.GetAuditIDTruncated(req)))
return
}
http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError)
klog.Errorf("apiserver panic'd on %v %v", req.Method, req.RequestURI)
klog.ErrorS(nil, "apiserver panic'd", "method", req.Method, "URI", req.RequestURI, "audit-ID", request.GetAuditIDTruncated(req))
})
}

View File

@ -25,6 +25,7 @@ import (
"runtime"
"time"
"k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/klog/v2"
)
@ -157,12 +158,14 @@ func (rl *respLogger) Addf(format string, data ...interface{}) {
func (rl *respLogger) LogArgs() []interface{} {
latency := time.Since(rl.startTime)
auditID := request.GetAuditIDTruncated(rl.req)
if rl.hijacked {
return []interface{}{
"verb", rl.req.Method,
"URI", rl.req.RequestURI,
"latency", latency,
"userAgent", rl.req.UserAgent(),
"audit-ID", auditID,
"srcIP", rl.req.RemoteAddr,
"hijacked", true,
}
@ -172,6 +175,7 @@ func (rl *respLogger) LogArgs() []interface{} {
"URI", rl.req.RequestURI,
"latency", latency,
"userAgent", rl.req.UserAgent(),
"audit-ID", auditID,
"srcIP", rl.req.RemoteAddr,
"resp", rl.status,
}

View File

@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/httpstream/spdy"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/proxy"
auditinternal "k8s.io/apiserver/pkg/apis/audit"
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
endpointmetrics "k8s.io/apiserver/pkg/endpoints/metrics"
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
@ -201,6 +202,12 @@ func newRequestForProxy(location *url.URL, req *http.Request) (*http.Request, co
newReq.URL = location
newReq.Host = location.Host
// If the original request has an audit ID, let's make sure we propagate this
// to the aggregated server.
if auditID, found := genericapirequest.AuditIDFrom(req.Context()); found {
newReq.Header.Set(auditinternal.HeaderAuditID, string(auditID))
}
return newReq, cancelFn
}

View File

@ -36,6 +36,7 @@ import (
"golang.org/x/net/websocket"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/proxy"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/authentication/user"
@ -577,6 +578,46 @@ func TestGetContextForNewRequest(t *testing.T) {
}
func TestNewRequestForProxyWithAuditID(t *testing.T) {
tests := []struct {
name string
auditID string
}{
{
name: "original request has Audit-ID",
auditID: "foo-bar",
},
{
name: "original request does not have Audit-ID",
auditID: "",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
req, err := http.NewRequest(http.MethodGet, "/api/group/version/foos/namespace/foo", nil)
if err != nil {
t.Fatalf("failed to create new http request - %v", err)
}
req = req.WithContext(genericapirequest.WithRequestInfo(req.Context(), &genericapirequest.RequestInfo{Path: req.URL.Path}))
if len(test.auditID) > 0 {
req = req.WithContext(genericapirequest.WithAuditID(req.Context(), types.UID(test.auditID)))
}
newReq, _ := newRequestForProxy(req.URL, req)
if newReq == nil {
t.Fatal("expected a non nil Request object")
}
auditIDGot := newReq.Header.Get("Audit-ID")
if test.auditID != auditIDGot {
t.Errorf("expected an Audit-ID value: %q, but got: %q", test.auditID, auditIDGot)
}
})
}
}
// TestProxyCertReload verifies that the proxy reloading of certificates work
// to be able to test the reloading it starts a server with client auth enabled
// it first uses certs that does not match the client CA so the verification fails - expecting HTTP 503