diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index af9dd5101fe..4b5ab46a89e 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -159,7 +159,7 @@ type ProxyServer struct { Broadcaster events.EventBroadcaster Recorder events.EventRecorder NodeRef *v1.ObjectReference - HealthzServer *healthcheck.ProxierHealthServer + HealthzServer *healthcheck.ProxyHealthServer Hostname string PrimaryIPFamily v1.IPFamily NodeIPs map[v1.IPFamily]net.IP @@ -224,7 +224,7 @@ func newProxyServer(ctx context.Context, config *kubeproxyconfig.KubeProxyConfig } if len(config.HealthzBindAddress) > 0 { - s.HealthzServer = healthcheck.NewProxierHealthServer(config.HealthzBindAddress, 2*config.SyncPeriod.Duration) + s.HealthzServer = healthcheck.NewProxyHealthServer(config.HealthzBindAddress, 2*config.SyncPeriod.Duration) } err = s.platformSetup(ctx) @@ -415,7 +415,7 @@ func createClient(ctx context.Context, config componentbaseconfig.ClientConnecti return client, nil } -func serveHealthz(ctx context.Context, hz *healthcheck.ProxierHealthServer, errCh chan error) { +func serveHealthz(ctx context.Context, hz *healthcheck.ProxyHealthServer, errCh chan error) { logger := klog.FromContext(ctx) if hz == nil { return diff --git a/pkg/proxy/healthcheck/common.go b/pkg/proxy/healthcheck/common.go index c7a9f8bfbe0..5e026484665 100644 --- a/pkg/proxy/healthcheck/common.go +++ b/pkg/proxy/healthcheck/common.go @@ -24,21 +24,21 @@ import ( netutils "k8s.io/utils/net" ) -// listener allows for testing of ServiceHealthServer and ProxierHealthServer. +// listener allows for testing of ServiceHealthServer and ProxyHealthServer. type listener interface { // Listen is very much like netutils.MultiListen, except the second arg (network) is // fixed to be "tcp". Listen(ctx context.Context, addrs ...string) (net.Listener, error) } -// httpServerFactory allows for testing of ServiceHealthServer and ProxierHealthServer. +// httpServerFactory allows for testing of ServiceHealthServer and ProxyHealthServer. type httpServerFactory interface { // New creates an instance of a type satisfying HTTPServer. This is // designed to include http.Server. New(handler http.Handler) httpServer } -// httpServer allows for testing of ServiceHealthServer and ProxierHealthServer. +// httpServer allows for testing of ServiceHealthServer and ProxyHealthServer. // It is designed so that http.Server satisfies this interface, type httpServer interface { Serve(listener net.Listener) error diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index 777f529897e..e096d6fc34a 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/dump" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/utils/ptr" basemetrics "k8s.io/component-base/metrics" "k8s.io/kubernetes/pkg/proxy/metrics" @@ -132,25 +133,20 @@ type hcPayload struct { ServiceProxyHealthy bool } -type healthzPayload struct { - LastUpdated string - CurrentTime string - NodeHealthy bool -} - -type fakeProxierHealthChecker struct { +type fakeProxyHealthChecker struct { healthy bool } -func (fake fakeProxierHealthChecker) IsHealthy() bool { - return fake.healthy +func (fake fakeProxyHealthChecker) Health() ProxyHealth { + // we only need "healthy" field for testing service healthchecks. + return ProxyHealth{Healthy: fake.healthy} } func TestServer(t *testing.T) { listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() nodePortAddresses := proxyutil.NewNodePortAddresses(v1.IPv4Protocol, []string{}) - proxyChecker := &fakeProxierHealthChecker{true} + proxyChecker := &fakeProxyHealthChecker{true} hcsi := newServiceHealthServer("hostname", nil, listener, httpFactory, nodePortAddresses, proxyChecker) hcs := hcsi.(*server) @@ -469,7 +465,7 @@ func TestHealthzServer(t *testing.T) { httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) - hs := newProxierHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) + hs := newProxyHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) server := hs.httpFactory.New(healthzHandler{hs: hs}) hsTest := &serverTest{ @@ -479,23 +475,66 @@ func TestHealthzServer(t *testing.T) { tracking503: 0, } - testProxierHealthUpdater(hs, hsTest, fakeClock, t) + var expectedPayload ProxyHealth + // testProxyHealthUpdater only asserts on proxy health, without considering node eligibility + // defaulting node health to true for testing. + testProxyHealthUpdater(hs, hsTest, fakeClock, ptr.To(true), t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a ToBeDeletedTaint node hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a deleted node hs.SyncNode(makeNode(tweakDeleted())) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) } func TestLivezServer(t *testing.T) { @@ -504,7 +543,7 @@ func TestLivezServer(t *testing.T) { httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) - hs := newProxierHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) + hs := newProxyHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) server := hs.httpFactory.New(livezHandler{hs: hs}) hsTest := &serverTest{ @@ -514,23 +553,62 @@ func TestLivezServer(t *testing.T) { tracking503: 0, } - testProxierHealthUpdater(hs, hsTest, fakeClock, t) + var expectedPayload ProxyHealth + // /livez doesn't have a concept of "nodeEligible", keeping the value + // for node eligibility nil. + testProxyHealthUpdater(hs, hsTest, fakeClock, nil, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakDeleted())) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } type url string @@ -540,78 +618,257 @@ var ( livezURL url = "/livez" ) -func testProxierHealthUpdater(hs *ProxierHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, t *testing.T) { +func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, nodeEligible *bool, t *testing.T) { + var expectedPayload ProxyHealth + // lastUpdated is used to track the time whenever any of the proxier update is simulated, + // is used in assertion of the http response. + var lastUpdated time.Time + var ipv4LastUpdated time.Time + var ipv6LastUpdated time.Time + // Should return 200 "OK" by default. - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" after first update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + ipv6LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should continue to return 200 "OK" as long as no further updates are queued for any proxier. fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv4 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv6 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if both IPv4 and IPv6 proxiers exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If IPv6 proxier is late for an update but IPv4 proxier is not then updating IPv4 proxier should have no effect. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + ipv4LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + ipv6LastUpdated = fakeClock.Now() + lastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If both IPv4 and IPv6 proxiers are late for an update, we shouldn't report 200 "OK" until after both of them update. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + ipv6LastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } -func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { +func testHTTPHandler(hsTest *serverTest, status int, expectedPayload ProxyHealth, t *testing.T) { + t.Helper() handler := hsTest.server.(*fakeHTTPServer).handler req, err := http.NewRequest("GET", string(hsTest.url), nil) if err != nil { @@ -624,11 +881,31 @@ func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { if resp.Code != status { t.Errorf("expected status code %v, got %v", status, resp.Code) } - var payload healthzPayload + var payload ProxyHealth if err := json.Unmarshal(resp.Body.Bytes(), &payload); err != nil { t.Fatal(err) } + // assert on payload + if !payload.LastUpdated.Equal(expectedPayload.LastUpdated) { + t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated.String(), payload.LastUpdated.String()) + } + if !payload.CurrentTime.Equal(expectedPayload.CurrentTime) { + t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime.String(), payload.CurrentTime.String()) + } + if payload.Healthy != expectedPayload.Healthy { + t.Errorf("expected healthy: %v, got: %v", expectedPayload.Healthy, payload.Healthy) + } + // assert on individual proxier response + for ipFamily := range payload.Status { + if payload.Status[ipFamily].Healthy != expectedPayload.Status[ipFamily].Healthy { + t.Errorf("expected healthy[%s]: %v, got: %v", ipFamily, expectedPayload.Status[ipFamily].Healthy, payload.Status[ipFamily].Healthy) + } + if !payload.Status[ipFamily].LastUpdated.Equal(expectedPayload.Status[ipFamily].LastUpdated) { + t.Errorf("expected last updated[%s]: %s; got: %s", ipFamily, expectedPayload.Status[ipFamily].LastUpdated.String(), payload.Status[ipFamily].LastUpdated.String()) + } + } + if status == http.StatusOK { hsTest.tracking200++ } @@ -636,10 +913,16 @@ func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { hsTest.tracking503++ } if hsTest.url == healthzURL { + if *payload.NodeEligible != *expectedPayload.NodeEligible { + t.Errorf("expected node eligible: %v; got: %v", *payload.NodeEligible, *expectedPayload.NodeEligible) + } testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("200"), float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("503"), float64(hsTest.tracking503), t) } if hsTest.url == livezURL { + if payload.NodeEligible != nil { + t.Errorf("expected node eligible nil for %s response", livezURL) + } testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("200"), float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("503"), float64(hsTest.tracking503), t) } @@ -659,7 +942,7 @@ func testMetricEquals(metric basemetrics.CounterMetric, expected float64, t *tes func TestServerWithSelectiveListeningAddress(t *testing.T) { listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() - proxyChecker := &fakeProxierHealthChecker{true} + proxyChecker := &fakeProxyHealthChecker{true} // limiting addresses to loop back. We don't want any cleverness here around getting IP for // machine nor testing ipv6 || ipv4. using loop back guarantees the test will work on any machine diff --git a/pkg/proxy/healthcheck/proxier_health.go b/pkg/proxy/healthcheck/proxy_health.go similarity index 65% rename from pkg/proxy/healthcheck/proxier_health.go rename to pkg/proxy/healthcheck/proxy_health.go index a76437189bd..9eec48833a9 100644 --- a/pkg/proxy/healthcheck/proxier_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -18,6 +18,7 @@ package healthcheck import ( "context" + "encoding/json" "fmt" "net/http" "sync" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/proxy/metrics" "k8s.io/utils/clock" + "k8s.io/utils/ptr" ) const ( @@ -35,14 +37,35 @@ const ( ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" ) -// ProxierHealthServer allows callers to: +// ProxierHealth represents the health of a proxier which operates on a single IP family. +type ProxierHealth struct { + LastUpdated time.Time `json:"lastUpdated"` + Healthy bool `json:"healthy"` +} + +// ProxyHealth represents the health of kube-proxy, embeds health of individual proxiers. +type ProxyHealth struct { + // LastUpdated is the last updated time of the proxier + // which was updated most recently. + // This is kept for backward-compatibility. + LastUpdated time.Time `json:"lastUpdated"` + CurrentTime time.Time `json:"currentTime"` + NodeEligible *bool `json:"nodeEligible,omitempty"` + // Healthy is true when all the proxiers are healthy, + // false otherwise. + Healthy bool `json:"healthy"` + // status of the health check per IP family + Status map[v1.IPFamily]ProxierHealth `json:"status,omitempty"` +} + +// ProxyHealthServer allows callers to: // 1. run a http server with /healthz and /livez endpoint handlers. // 2. update healthz timestamps before and after synchronizing dataplane. // 3. sync node status, for reporting unhealthy /healthz response // if the node is marked for deletion by autoscaler. // 4. get proxy health by verifying that the delay between QueuedUpdate() // calls and Updated() calls exceeded healthTimeout or not. -type ProxierHealthServer struct { +type ProxyHealthServer struct { listener listener httpFactory httpServerFactory clock clock.Clock @@ -56,13 +79,13 @@ type ProxierHealthServer struct { nodeEligible bool } -// NewProxierHealthServer returns a proxier health http server. -func NewProxierHealthServer(addr string, healthTimeout time.Duration) *ProxierHealthServer { - return newProxierHealthServer(stdNetListener{}, stdHTTPServerFactory{}, clock.RealClock{}, addr, healthTimeout) +// NewProxyHealthServer returns a proxy health http server. +func NewProxyHealthServer(addr string, healthTimeout time.Duration) *ProxyHealthServer { + return newProxyHealthServer(stdNetListener{}, stdHTTPServerFactory{}, clock.RealClock{}, addr, healthTimeout) } -func newProxierHealthServer(listener listener, httpServerFactory httpServerFactory, c clock.Clock, addr string, healthTimeout time.Duration) *ProxierHealthServer { - return &ProxierHealthServer{ +func newProxyHealthServer(listener listener, httpServerFactory httpServerFactory, c clock.Clock, addr string, healthTimeout time.Duration) *ProxyHealthServer { + return &ProxyHealthServer{ listener: listener, httpFactory: httpServerFactory, clock: c, @@ -80,7 +103,7 @@ func newProxierHealthServer(listener listener, httpServerFactory httpServerFacto // Updated should be called when the proxier of the given IP family has successfully updated // the service rules to reflect the current state and should be considered healthy now. -func (hs *ProxierHealthServer) Updated(ipFamily v1.IPFamily) { +func (hs *ProxyHealthServer) Updated(ipFamily v1.IPFamily) { hs.lock.Lock() defer hs.lock.Unlock() delete(hs.oldestPendingQueuedMap, ipFamily) @@ -92,7 +115,7 @@ func (hs *ProxierHealthServer) Updated(ipFamily v1.IPFamily) { // indicates that the proxier for the given IP family has received changes but has not // yet pushed them to its backend. If the proxier does not call Updated within the // healthTimeout time then it will be considered unhealthy. -func (hs *ProxierHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { +func (hs *ProxyHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { hs.lock.Lock() defer hs.lock.Unlock() // Set oldestPendingQueuedMap[ipFamily] only if it's currently unset @@ -101,26 +124,32 @@ func (hs *ProxierHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { } } -// IsHealthy returns only the proxier's health state, following the same -// definition the HTTP server defines, but ignoring the state of the Node. -func (hs *ProxierHealthServer) IsHealthy() bool { - isHealthy, _ := hs.isHealthy() - return isHealthy -} - -func (hs *ProxierHealthServer) isHealthy() (bool, time.Time) { +// Health returns proxy health status. +func (hs *ProxyHealthServer) Health() ProxyHealth { + var health = ProxyHealth{ + Healthy: true, + Status: make(map[v1.IPFamily]ProxierHealth), + } hs.lock.RLock() defer hs.lock.RUnlock() var lastUpdated time.Time - currentTime := hs.clock.Now() - for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { - if proxierLastUpdated.After(lastUpdated) { lastUpdated = proxierLastUpdated } + // initialize the health status of each proxier + // with healthy=true and the last updated time + // of the proxier. + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: true, + } + } + currentTime := hs.clock.Now() + health.CurrentTime = currentTime + for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { if _, set := hs.oldestPendingQueuedMap[ipFamily]; !set { // the proxier is healthy while it's starting up // or the proxier is fully synced. @@ -131,14 +160,21 @@ func (hs *ProxierHealthServer) isHealthy() (bool, time.Time) { // there's an unprocessed update queued for this proxier, but it's not late yet. continue } - return false, proxierLastUpdated + + // mark the status unhealthy. + health.Healthy = false + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: false, + } } - return true, lastUpdated + health.LastUpdated = lastUpdated + return health } // SyncNode syncs the node and determines if it is eligible or not. Eligible is // defined as being: not tainted by ToBeDeletedTaint and not deleted. -func (hs *ProxierHealthServer) SyncNode(node *v1.Node) { +func (hs *ProxyHealthServer) SyncNode(node *v1.Node) { hs.lock.Lock() defer hs.lock.Unlock() @@ -155,15 +191,15 @@ func (hs *ProxierHealthServer) SyncNode(node *v1.Node) { hs.nodeEligible = true } -// NodeEligible returns nodeEligible field of ProxierHealthServer. -func (hs *ProxierHealthServer) NodeEligible() bool { +// NodeEligible returns nodeEligible field of ProxyHealthServer. +func (hs *ProxyHealthServer) NodeEligible() bool { hs.lock.RLock() defer hs.lock.RUnlock() return hs.nodeEligible } // Run starts the healthz HTTP server and blocks until it exits. -func (hs *ProxierHealthServer) Run(ctx context.Context) error { +func (hs *ProxyHealthServer) Run(ctx context.Context) error { serveMux := http.NewServeMux() serveMux.Handle("/healthz", healthzHandler{hs: hs}) serveMux.Handle("/livez", livezHandler{hs: hs}) @@ -171,27 +207,29 @@ func (hs *ProxierHealthServer) Run(ctx context.Context) error { listener, err := hs.listener.Listen(ctx, hs.addr) if err != nil { - return fmt.Errorf("failed to start proxier healthz on %s: %v", hs.addr, err) + return fmt.Errorf("failed to start proxy healthz on %s: %w", hs.addr, err) } klog.V(3).InfoS("Starting healthz HTTP server", "address", hs.addr) if err := server.Serve(listener); err != nil { - return fmt.Errorf("proxier healthz closed with error: %v", err) + return fmt.Errorf("proxy healthz closed with error: %w", err) } return nil } type healthzHandler struct { - hs *ProxierHealthServer + hs *ProxyHealthServer } -func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { +func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { + health := h.hs.Health() nodeEligible := h.hs.NodeEligible() - healthy, lastUpdated := h.hs.isHealthy() - currentTime := h.hs.clock.Now() + healthy := health.Healthy && nodeEligible + // updating the node eligibility here (outside of Health() call) as we only want responses + // of /healthz calls (not /livez) to have that. + health.NodeEligible = ptr.To(nodeEligible) - healthy = healthy && nodeEligible resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") if !healthy { @@ -205,21 +243,23 @@ func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q, "nodeEligible": %v}`, lastUpdated, currentTime, nodeEligible) + + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } type livezHandler struct { - hs *ProxierHealthServer + hs *ProxyHealthServer } func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { - healthy, lastUpdated := h.hs.isHealthy() - currentTime := h.hs.clock.Now() + health := h.hs.Health() + resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") - if !healthy { + if !health.Healthy { metrics.ProxyLivezTotal.WithLabelValues("503").Inc() resp.WriteHeader(http.StatusServiceUnavailable) } else { @@ -230,7 +270,8 @@ func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q}`, lastUpdated, currentTime) + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } diff --git a/pkg/proxy/healthcheck/service_health.go b/pkg/proxy/healthcheck/service_health.go index 44eb8b687cf..37a4ed7b6fa 100644 --- a/pkg/proxy/healthcheck/service_health.go +++ b/pkg/proxy/healthcheck/service_health.go @@ -52,13 +52,12 @@ type ServiceHealthServer interface { SyncEndpoints(newEndpoints map[types.NamespacedName]int) error } -type proxierHealthChecker interface { - // IsHealthy returns the proxier's health state, following the same - // definition the HTTP server defines. - IsHealthy() bool +type proxyHealthChecker interface { + // Health returns the proxy's health state and last updated time. + Health() ProxyHealth } -func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxierHealthChecker) ServiceHealthServer { +func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { // It doesn't matter whether we listen on "0.0.0.0", "::", or ""; go // treats them all the same. nodeIPs := []net.IP{net.IPv4zero} @@ -84,7 +83,7 @@ func newServiceHealthServer(hostname string, recorder events.EventRecorder, list } // NewServiceHealthServer allocates a new service healthcheck server manager -func NewServiceHealthServer(hostname string, recorder events.EventRecorder, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxierHealthChecker) ServiceHealthServer { +func NewServiceHealthServer(hostname string, recorder events.EventRecorder, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { return newServiceHealthServer(hostname, recorder, stdNetListener{}, stdHTTPServerFactory{}, nodePortAddresses, healthzServer) } @@ -96,7 +95,7 @@ type server struct { listener listener httpFactory httpServerFactory - healthzServer proxierHealthChecker + healthzServer proxyHealthChecker lock sync.RWMutex services map[types.NamespacedName]*hcInstance @@ -231,7 +230,7 @@ func (h hcHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { } count := svc.endpoints h.hcs.lock.RUnlock() - kubeProxyHealthy := h.hcs.healthzServer.IsHealthy() + kubeProxyHealthy := h.hcs.healthzServer.Health().Healthy resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 5649a88b936..9c9e5f0c5cd 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -111,7 +111,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddresses []string, initOnly bool, ) (proxy.Provider, error) { @@ -178,7 +178,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer // Since converting probabilities (floats) to strings is expensive // and we are using only probabilities in the format of 1/n, we are @@ -240,7 +240,7 @@ func NewProxier(ctx context.Context, hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddressStrings []string, initOnly bool, ) (*Proxier, error) { diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index b3d0dd4f81b..11ec8c8d0d5 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -130,7 +130,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, scheduler string, nodePortAddresses []string, initOnly bool, @@ -212,7 +212,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer ipvsScheduler string // The following buffers are used to reuse memory and avoid allocations @@ -285,7 +285,7 @@ func NewProxier( hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, scheduler string, nodePortAddressStrings []string, initOnly bool, diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 16a78dba78f..f6907cd2f64 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -116,7 +116,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddresses []string, initOnly bool, ) (proxy.Provider, error) { @@ -181,7 +181,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer // nodePortAddresses selects the interfaces where nodePort works. nodePortAddresses *proxyutil.NodePortAddresses @@ -223,7 +223,7 @@ func NewProxier(ctx context.Context, hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddressStrings []string, initOnly bool, ) (*Proxier, error) { diff --git a/pkg/proxy/node.go b/pkg/proxy/node.go index 0de896710c4..db92508fb4b 100644 --- a/pkg/proxy/node.go +++ b/pkg/proxy/node.go @@ -93,7 +93,7 @@ func (n *NodePodCIDRHandler) OnNodeSynced() {} // NodeEligibleHandler handles the life cycle of the Node's eligibility, as // determined by the health server for directing load balancer traffic. type NodeEligibleHandler struct { - HealthServer *healthcheck.ProxierHealthServer + HealthServer *healthcheck.ProxyHealthServer } var _ config.NodeHandler = &NodeEligibleHandler{} diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index fbd3562e6fa..6b0c9de13f7 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -620,7 +620,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer hns HostNetworkService hcn HcnService @@ -676,7 +676,7 @@ func NewProxier( hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, healthzBindAddress string, config config.KubeProxyWinkernelConfiguration, ) (*Proxier, error) { @@ -813,7 +813,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, healthzBindAddress string, config config.KubeProxyWinkernelConfiguration, ) (proxy.Provider, error) {