From d6c575532ab305a242786f2cf83c49e2ae130c89 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Wed, 18 Dec 2024 18:00:44 +0530 Subject: [PATCH 1/6] pkg/proxy/healthcheck: rename 'proxier' to 'proxy' KubeProxy operates with a single health server and two proxies, one for each IP family. The use of the term 'proxier' in the types and functions within pkg/proxy/healthcheck can be misleading, as it may suggest the existence of two health servers, one for each IP family. Signed-off-by: Daman Arora --- cmd/kube-proxy/app/server.go | 6 ++-- pkg/proxy/healthcheck/common.go | 6 ++-- pkg/proxy/healthcheck/healthcheck_test.go | 18 +++++------ pkg/proxy/healthcheck/proxier_health.go | 38 +++++++++++------------ pkg/proxy/healthcheck/service_health.go | 10 +++--- pkg/proxy/iptables/proxier.go | 6 ++-- pkg/proxy/ipvs/proxier.go | 6 ++-- pkg/proxy/nftables/proxier.go | 6 ++-- pkg/proxy/node.go | 2 +- pkg/proxy/winkernel/proxier.go | 6 ++-- 10 files changed, 52 insertions(+), 52 deletions(-) diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index 1c6492633f4..4f8ff6cda1c 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -159,7 +159,7 @@ type ProxyServer struct { Broadcaster events.EventBroadcaster Recorder events.EventRecorder NodeRef *v1.ObjectReference - HealthzServer *healthcheck.ProxierHealthServer + HealthzServer *healthcheck.ProxyHealthServer Hostname string PrimaryIPFamily v1.IPFamily NodeIPs map[v1.IPFamily]net.IP @@ -224,7 +224,7 @@ func newProxyServer(ctx context.Context, config *kubeproxyconfig.KubeProxyConfig } if len(config.HealthzBindAddress) > 0 { - s.HealthzServer = healthcheck.NewProxierHealthServer(config.HealthzBindAddress, 2*config.SyncPeriod.Duration) + s.HealthzServer = healthcheck.NewProxyHealthServer(config.HealthzBindAddress, 2*config.SyncPeriod.Duration) } err = s.platformSetup(ctx) @@ -415,7 +415,7 @@ func createClient(ctx context.Context, config componentbaseconfig.ClientConnecti return client, nil } -func serveHealthz(ctx context.Context, hz *healthcheck.ProxierHealthServer, errCh chan error) { +func serveHealthz(ctx context.Context, hz *healthcheck.ProxyHealthServer, errCh chan error) { logger := klog.FromContext(ctx) if hz == nil { return diff --git a/pkg/proxy/healthcheck/common.go b/pkg/proxy/healthcheck/common.go index c7a9f8bfbe0..5e026484665 100644 --- a/pkg/proxy/healthcheck/common.go +++ b/pkg/proxy/healthcheck/common.go @@ -24,21 +24,21 @@ import ( netutils "k8s.io/utils/net" ) -// listener allows for testing of ServiceHealthServer and ProxierHealthServer. +// listener allows for testing of ServiceHealthServer and ProxyHealthServer. type listener interface { // Listen is very much like netutils.MultiListen, except the second arg (network) is // fixed to be "tcp". Listen(ctx context.Context, addrs ...string) (net.Listener, error) } -// httpServerFactory allows for testing of ServiceHealthServer and ProxierHealthServer. +// httpServerFactory allows for testing of ServiceHealthServer and ProxyHealthServer. type httpServerFactory interface { // New creates an instance of a type satisfying HTTPServer. This is // designed to include http.Server. New(handler http.Handler) httpServer } -// httpServer allows for testing of ServiceHealthServer and ProxierHealthServer. +// httpServer allows for testing of ServiceHealthServer and ProxyHealthServer. // It is designed so that http.Server satisfies this interface, type httpServer interface { Serve(listener net.Listener) error diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index 777f529897e..d1ff3e78cef 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -138,11 +138,11 @@ type healthzPayload struct { NodeHealthy bool } -type fakeProxierHealthChecker struct { +type fakeProxyHealthChecker struct { healthy bool } -func (fake fakeProxierHealthChecker) IsHealthy() bool { +func (fake fakeProxyHealthChecker) IsHealthy() bool { return fake.healthy } @@ -150,7 +150,7 @@ func TestServer(t *testing.T) { listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() nodePortAddresses := proxyutil.NewNodePortAddresses(v1.IPv4Protocol, []string{}) - proxyChecker := &fakeProxierHealthChecker{true} + proxyChecker := &fakeProxyHealthChecker{true} hcsi := newServiceHealthServer("hostname", nil, listener, httpFactory, nodePortAddresses, proxyChecker) hcs := hcsi.(*server) @@ -469,7 +469,7 @@ func TestHealthzServer(t *testing.T) { httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) - hs := newProxierHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) + hs := newProxyHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) server := hs.httpFactory.New(healthzHandler{hs: hs}) hsTest := &serverTest{ @@ -479,7 +479,7 @@ func TestHealthzServer(t *testing.T) { tracking503: 0, } - testProxierHealthUpdater(hs, hsTest, fakeClock, t) + testProxyHealthUpdater(hs, hsTest, fakeClock, t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) @@ -504,7 +504,7 @@ func TestLivezServer(t *testing.T) { httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) - hs := newProxierHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) + hs := newProxyHealthServer(listener, httpFactory, fakeClock, "127.0.0.1:10256", 10*time.Second) server := hs.httpFactory.New(livezHandler{hs: hs}) hsTest := &serverTest{ @@ -514,7 +514,7 @@ func TestLivezServer(t *testing.T) { tracking503: 0, } - testProxierHealthUpdater(hs, hsTest, fakeClock, t) + testProxyHealthUpdater(hs, hsTest, fakeClock, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) @@ -540,7 +540,7 @@ var ( livezURL url = "/livez" ) -func testProxierHealthUpdater(hs *ProxierHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, t *testing.T) { +func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, t *testing.T) { // Should return 200 "OK" by default. testHTTPHandler(hsTest, http.StatusOK, t) @@ -659,7 +659,7 @@ func testMetricEquals(metric basemetrics.CounterMetric, expected float64, t *tes func TestServerWithSelectiveListeningAddress(t *testing.T) { listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() - proxyChecker := &fakeProxierHealthChecker{true} + proxyChecker := &fakeProxyHealthChecker{true} // limiting addresses to loop back. We don't want any cleverness here around getting IP for // machine nor testing ipv6 || ipv4. using loop back guarantees the test will work on any machine diff --git a/pkg/proxy/healthcheck/proxier_health.go b/pkg/proxy/healthcheck/proxier_health.go index a76437189bd..9fbe4fc0331 100644 --- a/pkg/proxy/healthcheck/proxier_health.go +++ b/pkg/proxy/healthcheck/proxier_health.go @@ -35,14 +35,14 @@ const ( ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" ) -// ProxierHealthServer allows callers to: +// ProxyHealthServer allows callers to: // 1. run a http server with /healthz and /livez endpoint handlers. // 2. update healthz timestamps before and after synchronizing dataplane. // 3. sync node status, for reporting unhealthy /healthz response // if the node is marked for deletion by autoscaler. // 4. get proxy health by verifying that the delay between QueuedUpdate() // calls and Updated() calls exceeded healthTimeout or not. -type ProxierHealthServer struct { +type ProxyHealthServer struct { listener listener httpFactory httpServerFactory clock clock.Clock @@ -56,13 +56,13 @@ type ProxierHealthServer struct { nodeEligible bool } -// NewProxierHealthServer returns a proxier health http server. -func NewProxierHealthServer(addr string, healthTimeout time.Duration) *ProxierHealthServer { - return newProxierHealthServer(stdNetListener{}, stdHTTPServerFactory{}, clock.RealClock{}, addr, healthTimeout) +// NewProxyHealthServer returns a proxy health http server. +func NewProxyHealthServer(addr string, healthTimeout time.Duration) *ProxyHealthServer { + return newProxyHealthServer(stdNetListener{}, stdHTTPServerFactory{}, clock.RealClock{}, addr, healthTimeout) } -func newProxierHealthServer(listener listener, httpServerFactory httpServerFactory, c clock.Clock, addr string, healthTimeout time.Duration) *ProxierHealthServer { - return &ProxierHealthServer{ +func newProxyHealthServer(listener listener, httpServerFactory httpServerFactory, c clock.Clock, addr string, healthTimeout time.Duration) *ProxyHealthServer { + return &ProxyHealthServer{ listener: listener, httpFactory: httpServerFactory, clock: c, @@ -80,7 +80,7 @@ func newProxierHealthServer(listener listener, httpServerFactory httpServerFacto // Updated should be called when the proxier of the given IP family has successfully updated // the service rules to reflect the current state and should be considered healthy now. -func (hs *ProxierHealthServer) Updated(ipFamily v1.IPFamily) { +func (hs *ProxyHealthServer) Updated(ipFamily v1.IPFamily) { hs.lock.Lock() defer hs.lock.Unlock() delete(hs.oldestPendingQueuedMap, ipFamily) @@ -92,7 +92,7 @@ func (hs *ProxierHealthServer) Updated(ipFamily v1.IPFamily) { // indicates that the proxier for the given IP family has received changes but has not // yet pushed them to its backend. If the proxier does not call Updated within the // healthTimeout time then it will be considered unhealthy. -func (hs *ProxierHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { +func (hs *ProxyHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { hs.lock.Lock() defer hs.lock.Unlock() // Set oldestPendingQueuedMap[ipFamily] only if it's currently unset @@ -103,12 +103,12 @@ func (hs *ProxierHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { // IsHealthy returns only the proxier's health state, following the same // definition the HTTP server defines, but ignoring the state of the Node. -func (hs *ProxierHealthServer) IsHealthy() bool { +func (hs *ProxyHealthServer) IsHealthy() bool { isHealthy, _ := hs.isHealthy() return isHealthy } -func (hs *ProxierHealthServer) isHealthy() (bool, time.Time) { +func (hs *ProxyHealthServer) isHealthy() (bool, time.Time) { hs.lock.RLock() defer hs.lock.RUnlock() @@ -138,7 +138,7 @@ func (hs *ProxierHealthServer) isHealthy() (bool, time.Time) { // SyncNode syncs the node and determines if it is eligible or not. Eligible is // defined as being: not tainted by ToBeDeletedTaint and not deleted. -func (hs *ProxierHealthServer) SyncNode(node *v1.Node) { +func (hs *ProxyHealthServer) SyncNode(node *v1.Node) { hs.lock.Lock() defer hs.lock.Unlock() @@ -155,15 +155,15 @@ func (hs *ProxierHealthServer) SyncNode(node *v1.Node) { hs.nodeEligible = true } -// NodeEligible returns nodeEligible field of ProxierHealthServer. -func (hs *ProxierHealthServer) NodeEligible() bool { +// NodeEligible returns nodeEligible field of ProxyHealthServer. +func (hs *ProxyHealthServer) NodeEligible() bool { hs.lock.RLock() defer hs.lock.RUnlock() return hs.nodeEligible } // Run starts the healthz HTTP server and blocks until it exits. -func (hs *ProxierHealthServer) Run(ctx context.Context) error { +func (hs *ProxyHealthServer) Run(ctx context.Context) error { serveMux := http.NewServeMux() serveMux.Handle("/healthz", healthzHandler{hs: hs}) serveMux.Handle("/livez", livezHandler{hs: hs}) @@ -171,19 +171,19 @@ func (hs *ProxierHealthServer) Run(ctx context.Context) error { listener, err := hs.listener.Listen(ctx, hs.addr) if err != nil { - return fmt.Errorf("failed to start proxier healthz on %s: %v", hs.addr, err) + return fmt.Errorf("failed to start proxy healthz on %s: %v", hs.addr, err) } klog.V(3).InfoS("Starting healthz HTTP server", "address", hs.addr) if err := server.Serve(listener); err != nil { - return fmt.Errorf("proxier healthz closed with error: %v", err) + return fmt.Errorf("proxy healthz closed with error: %v", err) } return nil } type healthzHandler struct { - hs *ProxierHealthServer + hs *ProxyHealthServer } func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { @@ -211,7 +211,7 @@ func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { } type livezHandler struct { - hs *ProxierHealthServer + hs *ProxyHealthServer } func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { diff --git a/pkg/proxy/healthcheck/service_health.go b/pkg/proxy/healthcheck/service_health.go index 44eb8b687cf..e0894aae98a 100644 --- a/pkg/proxy/healthcheck/service_health.go +++ b/pkg/proxy/healthcheck/service_health.go @@ -52,13 +52,13 @@ type ServiceHealthServer interface { SyncEndpoints(newEndpoints map[types.NamespacedName]int) error } -type proxierHealthChecker interface { - // IsHealthy returns the proxier's health state, following the same +type proxyHealthChecker interface { + // IsHealthy returns the proxy's health state, following the same // definition the HTTP server defines. IsHealthy() bool } -func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxierHealthChecker) ServiceHealthServer { +func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { // It doesn't matter whether we listen on "0.0.0.0", "::", or ""; go // treats them all the same. nodeIPs := []net.IP{net.IPv4zero} @@ -84,7 +84,7 @@ func newServiceHealthServer(hostname string, recorder events.EventRecorder, list } // NewServiceHealthServer allocates a new service healthcheck server manager -func NewServiceHealthServer(hostname string, recorder events.EventRecorder, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxierHealthChecker) ServiceHealthServer { +func NewServiceHealthServer(hostname string, recorder events.EventRecorder, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { return newServiceHealthServer(hostname, recorder, stdNetListener{}, stdHTTPServerFactory{}, nodePortAddresses, healthzServer) } @@ -96,7 +96,7 @@ type server struct { listener listener httpFactory httpServerFactory - healthzServer proxierHealthChecker + healthzServer proxyHealthChecker lock sync.RWMutex services map[types.NamespacedName]*hcInstance diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index da61d131248..5ce65a618de 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -111,7 +111,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddresses []string, initOnly bool, ) (proxy.Provider, error) { @@ -177,7 +177,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer // Since converting probabilities (floats) to strings is expensive // and we are using only probabilities in the format of 1/n, we are @@ -239,7 +239,7 @@ func NewProxier(ctx context.Context, hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddressStrings []string, initOnly bool, ) (*Proxier, error) { diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index b3d0dd4f81b..11ec8c8d0d5 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -130,7 +130,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, scheduler string, nodePortAddresses []string, initOnly bool, @@ -212,7 +212,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer ipvsScheduler string // The following buffers are used to reuse memory and avoid allocations @@ -285,7 +285,7 @@ func NewProxier( hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, scheduler string, nodePortAddressStrings []string, initOnly bool, diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index db993dab5ad..2076bd8f7e3 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -116,7 +116,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddresses []string, initOnly bool, ) (proxy.Provider, error) { @@ -180,7 +180,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer // nodePortAddresses selects the interfaces where nodePort works. nodePortAddresses *proxyutil.NodePortAddresses @@ -222,7 +222,7 @@ func NewProxier(ctx context.Context, hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, nodePortAddressStrings []string, initOnly bool, ) (*Proxier, error) { diff --git a/pkg/proxy/node.go b/pkg/proxy/node.go index 0de896710c4..db92508fb4b 100644 --- a/pkg/proxy/node.go +++ b/pkg/proxy/node.go @@ -93,7 +93,7 @@ func (n *NodePodCIDRHandler) OnNodeSynced() {} // NodeEligibleHandler handles the life cycle of the Node's eligibility, as // determined by the health server for directing load balancer traffic. type NodeEligibleHandler struct { - HealthServer *healthcheck.ProxierHealthServer + HealthServer *healthcheck.ProxyHealthServer } var _ config.NodeHandler = &NodeEligibleHandler{} diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index fbd3562e6fa..6b0c9de13f7 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -620,7 +620,7 @@ type Proxier struct { recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer - healthzServer *healthcheck.ProxierHealthServer + healthzServer *healthcheck.ProxyHealthServer hns HostNetworkService hcn HcnService @@ -676,7 +676,7 @@ func NewProxier( hostname string, nodeIP net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, healthzBindAddress string, config config.KubeProxyWinkernelConfiguration, ) (*Proxier, error) { @@ -813,7 +813,7 @@ func NewDualStackProxier( hostname string, nodeIPs map[v1.IPFamily]net.IP, recorder events.EventRecorder, - healthzServer *healthcheck.ProxierHealthServer, + healthzServer *healthcheck.ProxyHealthServer, healthzBindAddress string, config config.KubeProxyWinkernelConfiguration, ) (proxy.Provider, error) { From 0645f0e50ec848af731c8f5a33b86f9e1aa39879 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Thu, 19 Dec 2024 15:29:40 +0530 Subject: [PATCH 2/6] pkg/proxy/healthcheck: file rename Signed-off-by: Daman Arora --- .../healthcheck/{proxier_health.go => proxy_health.go} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename pkg/proxy/healthcheck/{proxier_health.go => proxy_health.go} (97%) diff --git a/pkg/proxy/healthcheck/proxier_health.go b/pkg/proxy/healthcheck/proxy_health.go similarity index 97% rename from pkg/proxy/healthcheck/proxier_health.go rename to pkg/proxy/healthcheck/proxy_health.go index 9fbe4fc0331..bb61adf22dc 100644 --- a/pkg/proxy/healthcheck/proxier_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -171,13 +171,13 @@ func (hs *ProxyHealthServer) Run(ctx context.Context) error { listener, err := hs.listener.Listen(ctx, hs.addr) if err != nil { - return fmt.Errorf("failed to start proxy healthz on %s: %v", hs.addr, err) + return fmt.Errorf("failed to start proxy healthz on %s: %w", hs.addr, err) } klog.V(3).InfoS("Starting healthz HTTP server", "address", hs.addr) if err := server.Serve(listener); err != nil { - return fmt.Errorf("proxy healthz closed with error: %v", err) + return fmt.Errorf("proxy healthz closed with error: %w", err) } return nil } @@ -186,7 +186,7 @@ type healthzHandler struct { hs *ProxyHealthServer } -func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { +func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { nodeEligible := h.hs.NodeEligible() healthy, lastUpdated := h.hs.isHealthy() currentTime := h.hs.clock.Now() From 64aac665fd98e2995a4fdcf3628b8ed10c63e324 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Fri, 10 Jan 2025 21:20:46 +0530 Subject: [PATCH 3/6] pkg/proxy/healthcheck: bug fix for last updated time The lastUpdated time returned by healthz call should be the latest lastUpdated time among the proxiers. Prior to this commit, if proxy is unhealthy, the returned lastUpdated time was lastUpdated time of the unhealthy proxier. Signed-off-by: Daman Arora --- pkg/proxy/healthcheck/proxy_health.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/proxy/healthcheck/proxy_health.go b/pkg/proxy/healthcheck/proxy_health.go index bb61adf22dc..652f7ef90dc 100644 --- a/pkg/proxy/healthcheck/proxy_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -113,14 +113,14 @@ func (hs *ProxyHealthServer) isHealthy() (bool, time.Time) { defer hs.lock.RUnlock() var lastUpdated time.Time - currentTime := hs.clock.Now() - - for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { - + for _, proxierLastUpdated := range hs.lastUpdatedMap { if proxierLastUpdated.After(lastUpdated) { lastUpdated = proxierLastUpdated } + } + currentTime := hs.clock.Now() + for ipFamily, _ := range hs.lastUpdatedMap { if _, set := hs.oldestPendingQueuedMap[ipFamily]; !set { // the proxier is healthy while it's starting up // or the proxier is fully synced. @@ -131,7 +131,7 @@ func (hs *ProxyHealthServer) isHealthy() (bool, time.Time) { // there's an unprocessed update queued for this proxier, but it's not late yet. continue } - return false, proxierLastUpdated + return false, lastUpdated } return true, lastUpdated } From 1c1fc73616227df604819ec033f194a5d1c8606c Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Thu, 9 Jan 2025 12:13:40 +0530 Subject: [PATCH 4/6] pkg/proxy/healthcheck: enhance testing Signed-off-by: Daman Arora --- pkg/proxy/healthcheck/healthcheck_test.go | 132 +++++++++++++++++----- 1 file changed, 102 insertions(+), 30 deletions(-) diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index d1ff3e78cef..83c9708909b 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/dump" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/utils/ptr" basemetrics "k8s.io/component-base/metrics" "k8s.io/kubernetes/pkg/proxy/metrics" @@ -133,9 +134,9 @@ type hcPayload struct { } type healthzPayload struct { - LastUpdated string - CurrentTime string - NodeHealthy bool + LastUpdated string + CurrentTime string + NodeEligible *bool } type fakeProxyHealthChecker struct { @@ -479,23 +480,30 @@ func TestHealthzServer(t *testing.T) { tracking503: 0, } - testProxyHealthUpdater(hs, hsTest, fakeClock, t) + var expectedPayload healthzPayload + // testProxyHealthUpdater only asserts on proxy health, without considering node eligibility + // defaulting node health to true for testing. + testProxyHealthUpdater(hs, hsTest, fakeClock, ptr.To(true), t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a ToBeDeletedTaint node hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a deleted node hs.SyncNode(makeNode(tweakDeleted())) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) } func TestLivezServer(t *testing.T) { @@ -514,23 +522,30 @@ func TestLivezServer(t *testing.T) { tracking503: 0, } - testProxyHealthUpdater(hs, hsTest, fakeClock, t) + var expectedPayload healthzPayload + // /livez doesn't have a concept of "nodeEligible", keeping the value + // for node eligibility nil. + testProxyHealthUpdater(hs, hsTest, fakeClock, nil, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakDeleted())) - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } type url string @@ -540,78 +555,121 @@ var ( livezURL url = "/livez" ) -func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, t *testing.T) { +func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, nodeEligible *bool, t *testing.T) { + var expectedPayload healthzPayload + // lastUpdated is used to track the time whenever any of the proxier update is simulated, + // is used in assertion of the http response. + var lastUpdated time.Time + // Should return 200 "OK" by default. - testHTTPHandler(hsTest, http.StatusOK, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" after first update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + lastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should continue to return 200 "OK" as long as no further updates are queued for any proxier. fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv4 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) hs.Updated(v1.IPv6Protocol) + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv6 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) hs.Updated(v1.IPv6Protocol) + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if both IPv4 and IPv6 proxiers exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) hs.Updated(v1.IPv6Protocol) + lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) - testHTTPHandler(hsTest, http.StatusOK, t) + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If IPv6 proxier is late for an update but IPv4 proxier is not then updating IPv4 proxier should have no effect. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + lastUpdated = fakeClock.Now() + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + lastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If both IPv4 and IPv6 proxiers are late for an update, we shouldn't report 200 "OK" until after both of them update. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - testHTTPHandler(hsTest, http.StatusServiceUnavailable, t) + lastUpdated = fakeClock.Now() + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv6Protocol) - testHTTPHandler(hsTest, http.StatusOK, t) + lastUpdated = fakeClock.Now() + // for backward-compatibility returned last_updated is current_time when proxy is healthy, + // using fakeClock.Now().String() instead of lastUpdated.String() here. + expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } -func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { +func testHTTPHandler(hsTest *serverTest, status int, expectedPayload healthzPayload, t *testing.T) { + t.Helper() handler := hsTest.server.(*fakeHTTPServer).handler req, err := http.NewRequest("GET", string(hsTest.url), nil) if err != nil { @@ -629,6 +687,14 @@ func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { t.Fatal(err) } + // assert on payload + if payload.LastUpdated != expectedPayload.LastUpdated { + t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated, payload.LastUpdated) + } + if payload.CurrentTime != expectedPayload.CurrentTime { + t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime, payload.CurrentTime) + } + if status == http.StatusOK { hsTest.tracking200++ } @@ -636,10 +702,16 @@ func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) { hsTest.tracking503++ } if hsTest.url == healthzURL { + if *payload.NodeEligible != *expectedPayload.NodeEligible { + t.Errorf("expected node eligible: %v; got: %v", *payload.NodeEligible, *expectedPayload.NodeEligible) + } testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("200"), float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("503"), float64(hsTest.tracking503), t) } if hsTest.url == livezURL { + if payload.NodeEligible != nil { + t.Errorf("expected node eligible nil for %s response", livezURL) + } testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("200"), float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("503"), float64(hsTest.tracking503), t) } From 3274dc40edc2aff7eb88dade16a05360f5bce261 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Wed, 18 Dec 2024 18:23:03 +0530 Subject: [PATCH 5/6] pkg/proxy/healthcheck: consolidate IsHealthy and isHealthy Signed-off-by: Daman Arora --- pkg/proxy/healthcheck/healthcheck_test.go | 5 +++-- pkg/proxy/healthcheck/proxy_health.go | 14 ++++---------- pkg/proxy/healthcheck/service_health.go | 8 ++++---- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index 83c9708909b..9eb92004d58 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -143,8 +143,9 @@ type fakeProxyHealthChecker struct { healthy bool } -func (fake fakeProxyHealthChecker) IsHealthy() bool { - return fake.healthy +func (fake fakeProxyHealthChecker) Health() (bool, time.Time) { + // we only need "healthy" field for testing service healthchecks. + return fake.healthy, time.Time{} } func TestServer(t *testing.T) { diff --git a/pkg/proxy/healthcheck/proxy_health.go b/pkg/proxy/healthcheck/proxy_health.go index 652f7ef90dc..2039f801954 100644 --- a/pkg/proxy/healthcheck/proxy_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -101,14 +101,8 @@ func (hs *ProxyHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { } } -// IsHealthy returns only the proxier's health state, following the same -// definition the HTTP server defines, but ignoring the state of the Node. -func (hs *ProxyHealthServer) IsHealthy() bool { - isHealthy, _ := hs.isHealthy() - return isHealthy -} - -func (hs *ProxyHealthServer) isHealthy() (bool, time.Time) { +// Health returns only the proxier's health state and last updated time. +func (hs *ProxyHealthServer) Health() (bool, time.Time) { hs.lock.RLock() defer hs.lock.RUnlock() @@ -188,7 +182,7 @@ type healthzHandler struct { func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { nodeEligible := h.hs.NodeEligible() - healthy, lastUpdated := h.hs.isHealthy() + healthy, lastUpdated := h.hs.Health() currentTime := h.hs.clock.Now() healthy = healthy && nodeEligible @@ -215,7 +209,7 @@ type livezHandler struct { } func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { - healthy, lastUpdated := h.hs.isHealthy() + healthy, lastUpdated := h.hs.Health() currentTime := h.hs.clock.Now() resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") diff --git a/pkg/proxy/healthcheck/service_health.go b/pkg/proxy/healthcheck/service_health.go index e0894aae98a..0f9eb300f00 100644 --- a/pkg/proxy/healthcheck/service_health.go +++ b/pkg/proxy/healthcheck/service_health.go @@ -24,6 +24,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/lithammer/dedent" @@ -53,9 +54,8 @@ type ServiceHealthServer interface { } type proxyHealthChecker interface { - // IsHealthy returns the proxy's health state, following the same - // definition the HTTP server defines. - IsHealthy() bool + // Health returns the proxy's health state and last updated time. + Health() (bool, time.Time) } func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { @@ -231,7 +231,7 @@ func (h hcHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { } count := svc.endpoints h.hcs.lock.RUnlock() - kubeProxyHealthy := h.hcs.healthzServer.IsHealthy() + kubeProxyHealthy, _ := h.hcs.healthzServer.Health() resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") From 271b8cf1c1e3e720558a04147688507ffefff406 Mon Sep 17 00:00:00 2001 From: Daman Arora Date: Sat, 11 Jan 2025 12:40:51 +0530 Subject: [PATCH 6/6] kube-proxy healthz handler ip family aware Signed-off-by: Daman Arora Co-authored-by: Antonio Ojea --- pkg/proxy/healthcheck/healthcheck_test.go | 296 ++++++++++++++++++---- pkg/proxy/healthcheck/proxy_health.go | 79 ++++-- pkg/proxy/healthcheck/service_health.go | 5 +- 3 files changed, 318 insertions(+), 62 deletions(-) diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index 9eb92004d58..e096d6fc34a 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -133,19 +133,13 @@ type hcPayload struct { ServiceProxyHealthy bool } -type healthzPayload struct { - LastUpdated string - CurrentTime string - NodeEligible *bool -} - type fakeProxyHealthChecker struct { healthy bool } -func (fake fakeProxyHealthChecker) Health() (bool, time.Time) { +func (fake fakeProxyHealthChecker) Health() ProxyHealth { // we only need "healthy" field for testing service healthchecks. - return fake.healthy, time.Time{} + return ProxyHealth{Healthy: fake.healthy} } func TestServer(t *testing.T) { @@ -481,29 +475,65 @@ func TestHealthzServer(t *testing.T) { tracking503: 0, } - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // testProxyHealthUpdater only asserts on proxy health, without considering node eligibility // defaulting node health to true for testing. testProxyHealthUpdater(hs, hsTest, fakeClock, ptr.To(true), t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a ToBeDeletedTaint node hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" if we've synced a node, tainted in any other way hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(true)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(true), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if we've synced a deleted node hs.SyncNode(makeNode(tweakDeleted())) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: ptr.To(false)} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: ptr.To(false), + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) } @@ -523,29 +553,61 @@ func TestLivezServer(t *testing.T) { tracking503: 0, } - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // /livez doesn't have a concept of "nodeEligible", keeping the value // for node eligibility nil. testProxyHealthUpdater(hs, hsTest, fakeClock, nil, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted(ToBeDeletedTaint))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakTainted("other"))) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" irrespective of node syncs hs.SyncNode(makeNode(tweakDeleted())) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String()} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + v1.IPv6Protocol: {LastUpdated: fakeClock.Now(), Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } @@ -557,119 +619,255 @@ var ( ) func testProxyHealthUpdater(hs *ProxyHealthServer, hsTest *serverTest, fakeClock *testingclock.FakeClock, nodeEligible *bool, t *testing.T) { - var expectedPayload healthzPayload + var expectedPayload ProxyHealth // lastUpdated is used to track the time whenever any of the proxier update is simulated, // is used in assertion of the http response. var lastUpdated time.Time + var ipv4LastUpdated time.Time + var ipv6LastUpdated time.Time // Should return 200 "OK" by default. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 200 "OK" after first update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should continue to return 200 "OK" as long as no further updates are queued for any proxier. fakeClock.Step(25 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv4 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if IPv6 proxier exceed max update-processing time. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // Should return 503 "ServiceUnavailable" if both IPv4 and IPv6 proxiers exceed max update-processing time. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) // Should return 200 "OK" after processing update for both IPv4 and IPv6 proxiers. hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() fakeClock.Step(5 * time.Second) // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If IPv6 proxier is late for an update but IPv4 proxier is not then updating IPv4 proxier should have no effect. hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) + ipv4LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv6Protocol) + ipv6LastUpdated = fakeClock.Now() lastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) // If both IPv4 and IPv6 proxiers are late for an update, we shouldn't report 200 "OK" until after both of them update. hs.QueuedUpdate(v1.IPv4Protocol) hs.QueuedUpdate(v1.IPv6Protocol) fakeClock.Step(25 * time.Second) - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: lastUpdated, + Healthy: false, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: false}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: false}, + }, + } testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) hs.Updated(v1.IPv4Protocol) - lastUpdated = fakeClock.Now() - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: lastUpdated.String(), NodeEligible: nodeEligible} - testHTTPHandler(hsTest, http.StatusServiceUnavailable, expectedPayload, t) + ipv4LastUpdated = fakeClock.Now() hs.Updated(v1.IPv6Protocol) - lastUpdated = fakeClock.Now() + ipv6LastUpdated = fakeClock.Now() // for backward-compatibility returned last_updated is current_time when proxy is healthy, // using fakeClock.Now().String() instead of lastUpdated.String() here. - expectedPayload = healthzPayload{CurrentTime: fakeClock.Now().String(), LastUpdated: fakeClock.Now().String(), NodeEligible: nodeEligible} + expectedPayload = ProxyHealth{ + CurrentTime: fakeClock.Now(), + LastUpdated: fakeClock.Now(), + Healthy: true, + NodeEligible: nodeEligible, + Status: map[v1.IPFamily]ProxierHealth{ + v1.IPv4Protocol: {LastUpdated: ipv4LastUpdated, Healthy: true}, + v1.IPv6Protocol: {LastUpdated: ipv6LastUpdated, Healthy: true}, + }, + } testHTTPHandler(hsTest, http.StatusOK, expectedPayload, t) } -func testHTTPHandler(hsTest *serverTest, status int, expectedPayload healthzPayload, t *testing.T) { +func testHTTPHandler(hsTest *serverTest, status int, expectedPayload ProxyHealth, t *testing.T) { t.Helper() handler := hsTest.server.(*fakeHTTPServer).handler req, err := http.NewRequest("GET", string(hsTest.url), nil) @@ -683,17 +881,29 @@ func testHTTPHandler(hsTest *serverTest, status int, expectedPayload healthzPayl if resp.Code != status { t.Errorf("expected status code %v, got %v", status, resp.Code) } - var payload healthzPayload + var payload ProxyHealth if err := json.Unmarshal(resp.Body.Bytes(), &payload); err != nil { t.Fatal(err) } // assert on payload - if payload.LastUpdated != expectedPayload.LastUpdated { - t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated, payload.LastUpdated) + if !payload.LastUpdated.Equal(expectedPayload.LastUpdated) { + t.Errorf("expected last updated: %s; got: %s", expectedPayload.LastUpdated.String(), payload.LastUpdated.String()) } - if payload.CurrentTime != expectedPayload.CurrentTime { - t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime, payload.CurrentTime) + if !payload.CurrentTime.Equal(expectedPayload.CurrentTime) { + t.Errorf("expected current time: %s; got: %s", expectedPayload.CurrentTime.String(), payload.CurrentTime.String()) + } + if payload.Healthy != expectedPayload.Healthy { + t.Errorf("expected healthy: %v, got: %v", expectedPayload.Healthy, payload.Healthy) + } + // assert on individual proxier response + for ipFamily := range payload.Status { + if payload.Status[ipFamily].Healthy != expectedPayload.Status[ipFamily].Healthy { + t.Errorf("expected healthy[%s]: %v, got: %v", ipFamily, expectedPayload.Status[ipFamily].Healthy, payload.Status[ipFamily].Healthy) + } + if !payload.Status[ipFamily].LastUpdated.Equal(expectedPayload.Status[ipFamily].LastUpdated) { + t.Errorf("expected last updated[%s]: %s; got: %s", ipFamily, expectedPayload.Status[ipFamily].LastUpdated.String(), payload.Status[ipFamily].LastUpdated.String()) + } } if status == http.StatusOK { diff --git a/pkg/proxy/healthcheck/proxy_health.go b/pkg/proxy/healthcheck/proxy_health.go index 2039f801954..9eec48833a9 100644 --- a/pkg/proxy/healthcheck/proxy_health.go +++ b/pkg/proxy/healthcheck/proxy_health.go @@ -18,6 +18,7 @@ package healthcheck import ( "context" + "encoding/json" "fmt" "net/http" "sync" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/proxy/metrics" "k8s.io/utils/clock" + "k8s.io/utils/ptr" ) const ( @@ -35,6 +37,27 @@ const ( ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" ) +// ProxierHealth represents the health of a proxier which operates on a single IP family. +type ProxierHealth struct { + LastUpdated time.Time `json:"lastUpdated"` + Healthy bool `json:"healthy"` +} + +// ProxyHealth represents the health of kube-proxy, embeds health of individual proxiers. +type ProxyHealth struct { + // LastUpdated is the last updated time of the proxier + // which was updated most recently. + // This is kept for backward-compatibility. + LastUpdated time.Time `json:"lastUpdated"` + CurrentTime time.Time `json:"currentTime"` + NodeEligible *bool `json:"nodeEligible,omitempty"` + // Healthy is true when all the proxiers are healthy, + // false otherwise. + Healthy bool `json:"healthy"` + // status of the health check per IP family + Status map[v1.IPFamily]ProxierHealth `json:"status,omitempty"` +} + // ProxyHealthServer allows callers to: // 1. run a http server with /healthz and /livez endpoint handlers. // 2. update healthz timestamps before and after synchronizing dataplane. @@ -101,20 +124,32 @@ func (hs *ProxyHealthServer) QueuedUpdate(ipFamily v1.IPFamily) { } } -// Health returns only the proxier's health state and last updated time. -func (hs *ProxyHealthServer) Health() (bool, time.Time) { +// Health returns proxy health status. +func (hs *ProxyHealthServer) Health() ProxyHealth { + var health = ProxyHealth{ + Healthy: true, + Status: make(map[v1.IPFamily]ProxierHealth), + } hs.lock.RLock() defer hs.lock.RUnlock() var lastUpdated time.Time - for _, proxierLastUpdated := range hs.lastUpdatedMap { + for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { if proxierLastUpdated.After(lastUpdated) { lastUpdated = proxierLastUpdated } + // initialize the health status of each proxier + // with healthy=true and the last updated time + // of the proxier. + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: true, + } } currentTime := hs.clock.Now() - for ipFamily, _ := range hs.lastUpdatedMap { + health.CurrentTime = currentTime + for ipFamily, proxierLastUpdated := range hs.lastUpdatedMap { if _, set := hs.oldestPendingQueuedMap[ipFamily]; !set { // the proxier is healthy while it's starting up // or the proxier is fully synced. @@ -125,9 +160,16 @@ func (hs *ProxyHealthServer) Health() (bool, time.Time) { // there's an unprocessed update queued for this proxier, but it's not late yet. continue } - return false, lastUpdated + + // mark the status unhealthy. + health.Healthy = false + health.Status[ipFamily] = ProxierHealth{ + LastUpdated: proxierLastUpdated, + Healthy: false, + } } - return true, lastUpdated + health.LastUpdated = lastUpdated + return health } // SyncNode syncs the node and determines if it is eligible or not. Eligible is @@ -181,11 +223,13 @@ type healthzHandler struct { } func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { + health := h.hs.Health() nodeEligible := h.hs.NodeEligible() - healthy, lastUpdated := h.hs.Health() - currentTime := h.hs.clock.Now() + healthy := health.Healthy && nodeEligible + // updating the node eligibility here (outside of Health() call) as we only want responses + // of /healthz calls (not /livez) to have that. + health.NodeEligible = ptr.To(nodeEligible) - healthy = healthy && nodeEligible resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") if !healthy { @@ -199,9 +243,11 @@ func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, _ *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q, "nodeEligible": %v}`, lastUpdated, currentTime, nodeEligible) + + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } type livezHandler struct { @@ -209,11 +255,11 @@ type livezHandler struct { } func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { - healthy, lastUpdated := h.hs.Health() - currentTime := h.hs.clock.Now() + health := h.hs.Health() + resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff") - if !healthy { + if !health.Healthy { metrics.ProxyLivezTotal.WithLabelValues("503").Inc() resp.WriteHeader(http.StatusServiceUnavailable) } else { @@ -224,7 +270,8 @@ func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { // preserve compatibility, we use the same semantics: the returned // lastUpdated value is "recent" if the server is healthy. The kube-proxy // metrics provide more detailed information. - lastUpdated = currentTime + health.LastUpdated = h.hs.clock.Now() } - fmt.Fprintf(resp, `{"lastUpdated": %q,"currentTime": %q}`, lastUpdated, currentTime) + output, _ := json.Marshal(health) + _, _ = fmt.Fprint(resp, string(output)) } diff --git a/pkg/proxy/healthcheck/service_health.go b/pkg/proxy/healthcheck/service_health.go index 0f9eb300f00..37a4ed7b6fa 100644 --- a/pkg/proxy/healthcheck/service_health.go +++ b/pkg/proxy/healthcheck/service_health.go @@ -24,7 +24,6 @@ import ( "strconv" "strings" "sync" - "time" "github.com/lithammer/dedent" @@ -55,7 +54,7 @@ type ServiceHealthServer interface { type proxyHealthChecker interface { // Health returns the proxy's health state and last updated time. - Health() (bool, time.Time) + Health() ProxyHealth } func newServiceHealthServer(hostname string, recorder events.EventRecorder, listener listener, factory httpServerFactory, nodePortAddresses *proxyutil.NodePortAddresses, healthzServer proxyHealthChecker) ServiceHealthServer { @@ -231,7 +230,7 @@ func (h hcHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { } count := svc.endpoints h.hcs.lock.RUnlock() - kubeProxyHealthy, _ := h.hcs.healthzServer.Health() + kubeProxyHealthy := h.hcs.healthzServer.Health().Healthy resp.Header().Set("Content-Type", "application/json") resp.Header().Set("X-Content-Type-Options", "nosniff")