From 271b8cf1c1e3e720558a04147688507ffefff406 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Sat, 11 Jan 2025 12:40:51 +0530 Subject: [PATCH] kube-proxy healthz handler ip family aware Signed-off-by: Daman Arora Co-authored-by: Antonio Ojea --- pkg/proxy/healthcheck/healthcheck_test.go | 296 ++++++++++++++++++---- pkg/proxy/healthcheck/proxy_health.go | 79 ++++-- pkg/proxy/healthcheck/service_health.go | 5 +- 3 files changed, 318 insertions(+), 62 deletions(-) diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index 9eb92004d58..e096d6fc34a 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -133,19 +133,13 @@ type hcPayload struct { ServiceProxyHealthy bool } -type healthzPayload struct { - LastUpdated string - CurrentTime string - NodeEligible *bool -} - type fakeProxyHealthChecker struct { healthy bool } -func (fake fakeProxyHealthChecker) Health() (bool, time.Time) { +func (fake fakeProxyHealthChecker) Health() ProxyHealth { // we only need "healthy" field for testing service healthchecks. - return fake.healthy, time.Time{} + return ProxyHealth{Healthy: fake.healthy} } func TestServer(t *testing.T) { @@ -481,29 +475,65 @@ func TestHealthzServer(t *testing.T) { tracking503: 0, } - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // testProxyHealthUpdater only asserts on proxy health, without considering node eligibility // defaulting node health to true for testing. testProxyHealthUpdater(hs, hsTest, fakeClock, ptr.To(true), t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a ToBeDeletedTaint node hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a deleted node hs.SyncNode(makeNode(tweakDeleted())) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) } @@ -523,29 +553,61 @@ func TestLivezServer(t *testing.T) { tracking503: 0, } - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // /livez doesn't have a concept of "nodeEligible", keeping the value // for node eligibility nil. testProxyHealthUpdater(hs, hsTest, fakeClock, nil, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakDeleted())) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } @@ -557,119 +619,255 @@ var ( ) func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, nodeEligible *bool, t *testing.T) { - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // lastUpdated is used to track the time whenever any of the proxier update is simulated, // is used in assertion of the http response. var lastUpdated time.Time + var ipv4LastUpdated time.Time + var ipv6LastUpdated time.Time // Should return 200 "OK" by default. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" after first update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should continue to return 200 "OK" as long as no further updates are queued for any proxier. fakeClock.Step(25 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv4 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv6 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if both IPv4 and IPv6 proxiers exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If IPv6 proxier is late for an update but IPv4 proxier is not then updating IPv4 proxier should have no effect. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If both IPv4 and IPv6 proxiers are late for an update, we shouldn't report 200 "OK" until after both of them update. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - lastUpdated = fakeClock.Now() - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} - testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) - lastUpdated = fakeClock.Now() + ipv6LastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } -func testHTTPHandler(hsTest *serverTest, status int, expectedPayload healthzPayload, t *testing.T) { +func testHTTPHandler(hsTest *serverTest, status int, expectedPayload ProxyHealth, t *testing.T) { t.Helper() handler := hsTest.server.(*fakeHTTPServer).handler req, err := http.NewRequest("GET", string(hsTest.url), nil) @@ -683,17 +881,29 @@ func testHTTPHandler(hsTest *serverTest, status int, expectedPayload healthzPayl if resp.Code != status { t.Errorf("expected status code %v, got %v", status, resp.Code) } - var payload healthzPayload + var payload ProxyHealth if err := json.Unmarshal(resp.Body.Bytes(), &payload); err != nil { t.Fatal(err) } // assert on payload - if payload.LastUpdated != expectedPayload.LastUpdated { - t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated, payload.LastUpdated) + if !payload.LastUpdated.Equal(expectedPayload.LastUpdated) { + t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated.String(), payload.LastUpdated.String()) } - if payload.CurrentTime != expectedPayload.CurrentTime { - t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime, payload.CurrentTime) + if !payload.CurrentTime.Equal(expectedPayload.CurrentTime) { + t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime.String(), payload.CurrentTime.String()) + } + if payload.Healthy != expectedPayload.Healthy { + t.Errorf("expected healthy: %v, got: %v", expectedPayload.Healthy, payload.Healthy) + } + // assert on individual proxier response + for ipFamily := range payload.Status { + if payload.Status[ipFamily].Healthy != expectedPayload.Status[ipFamily].Healthy { + t.Errorf("expected healthy[%s]: %v, got: %v", ipFamily, expectedPayload.Status[ipFamily].Healthy, payload.Status[ipFamily].Healthy) + } + if !payload.Status[ipFamily].LastUpdated.Equal(expectedPayload.Status[ipFamily].LastUpdated) { + t.Errorf("expected last updated[%s]: %s; got: %s", ipFamily, expectedPayload.Status[ipFamily].LastUpdated.String(), payload.Status[ipFamily].LastUpdated.String()) + } } if status == http.StatusOK { diff --git a/pkg/proxy/healthcheck/proxy_health.go b/pkg/proxy/healthcheck/proxy_health.go index 2039f801954..9eec48833a9 100644 --- a/pkg/proxy/healthcheck/proxy_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -18,6 +18,7 @@ package healthcheck import ( "context" + "encoding/json" "fmt" "net/http" "sync" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/proxy/metrics" "k8s.io/utils/clock" + "k8s.io/utils/ptr" ) const ( @@ -35,6 +37,27 @@ const ( ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" ) +// ProxierHealth represents the health of a proxier which operates on a single IP family. +type ProxierHealth struct { + LastUpdated time.Time `json:"lastUpdated"` + Healthy bool `json:"healthy"` +} + +// ProxyHealth represents the health of kube-proxy, embeds health of individual proxiers. +type ProxyHealth struct { + // LastUpdated is the last updated time of the proxier + // which was updated most recently. + // This is kept for backward-compatibility. + LastUpdated time.Time `json:"lastUpdated"` + CurrentTime time.Time `json:"currentTime"` + NodeEligible *bool `json:"nodeEligible,omitempty"` + // Healthy is true when all the proxiers are healthy, + // false otherwise. + Healthy bool `json:"healthy"` + // status of the health check per IP family + Status map[v1.IPFamily]ProxierHealth `json:"status,omitempty"` +} + // ProxyHealthServer allows callers to: // 1. run a http server with /healthz and /livez endpoint handlers. // 2. update healthz timestamps before and after synchronizing dataplane. @@ -101,20 +124,32 @@ func (hs *ProxyHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { } } -// Health returns only the proxier's health state and last updated time. -func (hs *ProxyHealthServer) Health() (bool, time.Time) { +// Health returns proxy health status. +func (hs *ProxyHealthServer) Health() ProxyHealth { + var health = ProxyHealth{ + Healthy: true, + Status: make(map[v1.IPFamily]ProxierHealth), + } hs.lock.RLock() defer hs.lock.RUnlock() var lastUpdated time.Time - for _, proxierLastUpdated := range hs.lastUpdatedMap { + for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { if proxierLastUpdated.After(lastUpdated) { lastUpdated = proxierLastUpdated } + // initialize the health status of each proxier + // with healthy=true and the last updated time + // of the proxier. + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: true, + } } currentTime := hs.clock.Now() - for ipFamily, _ := range hs.lastUpdatedMap { + health.CurrentTime = currentTime + for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { if _, set := hs.oldestPendingQueuedMap[ipFamily]; !set { // the proxier is healthy while it's starting up // or the proxier is fully synced. @@ -125,9 +160,16 @@ func (hs *ProxyHealthServer) Health() (bool, time.Time) { // there's an unprocessed update queued for this proxier, but it's not late yet. continue } - return false, lastUpdated + + // mark the status unhealthy. + health.Healthy = false + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: false, + } } - return true, lastUpdated + health.LastUpdated = lastUpdated + return health } // SyncNode syncs the node and determines if it is eligible or not. Eligible is @@ -181,11 +223,13 @@ type healthzHandler struct { } func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { + health := h.hs.Health() nodeEligible := h.hs.NodeEligible() - healthy, lastUpdated := h.hs.Health() - currentTime := h.hs.clock.Now() + healthy := health.Healthy && nodeEligible + // updating the node eligibility here (outside of Health() call) as we only want responses + // of /healthz calls (not /livez) to have that. + health.NodeEligible = ptr.To(nodeEligible) - healthy = healthy && nodeEligible resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") if !healthy { @@ -199,9 +243,11 @@ func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q, "nodeEligible": %v}`, lastUpdated, currentTime, nodeEligible) + + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } type livezHandler struct { @@ -209,11 +255,11 @@ type livezHandler struct { } func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { - healthy, lastUpdated := h.hs.Health() - currentTime := h.hs.clock.Now() + health := h.hs.Health() + resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") - if !healthy { + if !health.Healthy { metrics.ProxyLivezTotal.WithLabelValues("503").Inc() resp.WriteHeader(http.StatusServiceUnavailable) } else { @@ -224,7 +270,8 @@ func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q}`, lastUpdated, currentTime) + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } diff --git a/pkg/proxy/healthcheck/service_health.go b/pkg/proxy/healthcheck/service_health.go index 0f9eb300f00..37a4ed7b6fa 100644 --- a/pkg/proxy/healthcheck/service_health.go +++ b/pkg/proxy/healthcheck/service_health.go @@ -24,7 +24,6 @@ import ( "strconv" "strings" "sync" - "time" "github.com/lithammer/dedent" @@ -55,7 +54,7 @@ type ServiceHealthServer interface { type proxyHealthChecker interface { // Health returns the proxy's health state and last updated time. - Health() (bool, time.Time) + Health() ProxyHealth } func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { @@ -231,7 +230,7 @@ func (h hcHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { } count := svc.endpoints h.hcs.lock.RUnlock() - kubeProxyHealthy, _ := h.hcs.healthzServer.Health() + kubeProxyHealthy := h.hcs.healthzServer.Health().Healthy resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff")