From e5ffba17bec0bf13ec6273149605633d7d67cf94 Mon Sep 17 00:00:00 2001 From: jralmaraz Date: Fri, 5 Jul 2024 03:24:29 +1000 Subject: [PATCH] Report event for the cases when probe returned Unknown result keep log success --- pkg/kubelet/prober/prober.go | 46 +++++++--- pkg/kubelet/prober/prober_test.go | 147 ++++++++++++++++++++++++++---- 2 files changed, 158 insertions(+), 35 deletions(-) diff --git a/pkg/kubelet/prober/prober.go b/pkg/kubelet/prober/prober.go index c1936db2efa..90728f6f107 100644 --- a/pkg/kubelet/prober/prober.go +++ b/pkg/kubelet/prober/prober.go @@ -98,24 +98,37 @@ func (pb *prober) probe(ctx context.Context, probeType probeType, pod *v1.Pod, s } result, output, err := pb.runProbeWithRetries(ctx, probeType, probeSpec, pod, status, container, containerID, maxProbeRetries) - if err != nil || (result != probe.Success && result != probe.Warning) { - // Probe failed in one way or another. - if err != nil { - klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) - pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored: %v", probeType, err) - } else { // result != probe.Success - klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output) - pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output) - } + + if err != nil { + // Handle probe error + klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result) + pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored and resulted in %s state: %s", probeType, result, err) return results.Failure, err } - if result == probe.Warning { + + switch result { + case probe.Success: + klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) + return results.Success, nil + + case probe.Warning: pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerProbeWarning, "%s probe warning: %s", probeType, output) klog.V(3).InfoS("Probe succeeded with a warning", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "output", output) - } else { - klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) + return results.Success, nil + + case probe.Failure: + klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output) + pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output) + return results.Failure, nil + + case probe.Unknown: + klog.V(1).InfoS("Probe unknown without error", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result) + return results.Failure, nil + + default: + klog.V(1).InfoS("Unsupported probe result", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result) + return results.Failure, nil } - return results.Success, nil } // runProbeWithRetries tries to probe the container in a finite loop, it returns the last result @@ -144,6 +157,8 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe case p.HTTPGet != nil: req, err := httpprobe.NewRequestForHTTPGetAction(p.HTTPGet, &container, status.PodIP, "probe") if err != nil { + // Log and record event for Unknown result + klog.V(4).InfoS("HTTP-Probe failed to create request", "error", err) return probe.Unknown, "", err } if klogV4 := klog.V(4); klogV4.Enabled() { @@ -152,13 +167,14 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe path := req.URL.Path scheme := req.URL.Scheme headers := p.HTTPGet.HTTPHeaders - klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers) + klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers, "probeType", probeType) } return pb.http.Probe(req, timeout) case p.TCPSocket != nil: port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container) if err != nil { + klog.V(4).InfoS("TCP-Probe failed to resolve port", "error", err) return probe.Unknown, "", err } host := p.TCPSocket.Host @@ -178,7 +194,7 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe return pb.grpc.Probe(host, service, int(p.GRPC.Port), timeout) default: - klog.InfoS("Failed to find probe builder for container", "containerName", container.Name) + klog.V(4).InfoS("Failed to find probe builder for container", "containerName", container.Name) return probe.Unknown, "", fmt.Errorf("missing probe handler for %s:%s", format.Pod(pod), container.Name) } } diff --git a/pkg/kubelet/prober/prober_test.go b/pkg/kubelet/prober/prober_test.go index 58f7d5545cc..fec3a8920dc 100644 --- a/pkg/kubelet/prober/prober_test.go +++ b/pkg/kubelet/prober/prober_test.go @@ -25,15 +25,20 @@ import ( "strings" "testing" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/api/legacyscheme" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/kubelet/util/ioutils" "k8s.io/kubernetes/pkg/probe" execprobe "k8s.io/kubernetes/pkg/probe/exec" + "k8s.io/kubernetes/test/utils/ktesting" ) func TestGetURLParts(t *testing.T) { @@ -141,6 +146,7 @@ func TestProbe(t *testing.T) { Exec: &v1.ExecAction{}, }, } + tests := []struct { probe *v1.Probe env []v1.EnvVar @@ -149,6 +155,7 @@ func TestProbe(t *testing.T) { execResult probe.Result expectedResult results.Result expectCommand []string + unsupported bool }{ { // No probe probe: nil, @@ -174,18 +181,25 @@ func TestProbe(t *testing.T) { execResult: probe.Warning, expectedResult: results.Success, }, - { // Probe result is unknown + { // Probe result is unknown with no error probe: execProbe, execResult: probe.Unknown, + expectError: false, expectedResult: results.Failure, }, - { // Probe has an error + { // Probe result is unknown with an error probe: execProbe, execError: true, expectError: true, execResult: probe.Unknown, expectedResult: results.Failure, }, + { // Unsupported probe type + probe: nil, + expectedResult: results.Failure, + expectError: true, + unsupported: true, + }, { // Probe arguments are passed through probe: &v1.Probe{ ProbeHandler: v1.ProbeHandler{ @@ -216,13 +230,17 @@ func TestProbe(t *testing.T) { } for i, test := range tests { - for _, probeType := range [...]probeType{liveness, readiness, startup} { + for _, pType := range [...]probeType{liveness, readiness, startup} { + + if test.unsupported { + pType = probeType(666) + } prober := &prober{ recorder: &record.FakeRecorder{}, } - testID := fmt.Sprintf("%d-%s", i, probeType) + testID := fmt.Sprintf("%d-%s", i, pType) testContainer := v1.Container{Env: test.env} - switch probeType { + switch pType { case liveness: testContainer.LivenessProbe = test.probe case readiness: @@ -236,25 +254,22 @@ func TestProbe(t *testing.T) { prober.exec = fakeExecProber{test.execResult, nil} } - result, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) - if test.expectError && err == nil { - t.Errorf("[%s] Expected probe error but no error was returned.", testID) - } - if !test.expectError && err != nil { - t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err) - } - if test.expectedResult != result { - t.Errorf("[%s] Expected result to be %v but was %v", testID, test.expectedResult, result) + result, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) + + if test.expectError { + require.Error(t, err, "[%s] Expected probe error but no error was returned.", testID) + } else { + require.NoError(t, err, "[%s] Didn't expect probe error", testID) } + require.Equal(t, test.expectedResult, result, "[%s] Expected result to be %v but was %v", testID, test.expectedResult, result) + if len(test.expectCommand) > 0 { prober.exec = execprobe.New() prober.runner = &containertest.FakeContainerCommandRunner{} - _, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) - if err != nil { - t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err) - continue - } + _, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) + require.NoError(t, err, "[%s] Didn't expect probe error ", testID) + if !reflect.DeepEqual(test.expectCommand, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) { t.Errorf("[%s] unexpected probe arguments: %v", testID, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) } @@ -264,7 +279,7 @@ func TestProbe(t *testing.T) { } func TestNewExecInContainer(t *testing.T) { - ctx := context.Background() + ctx := ktesting.Init(t) limit := 1024 tenKilobyte := strings.Repeat("logs-123", 128*10) @@ -333,3 +348,95 @@ func TestNewExecInContainer(t *testing.T) { } } } + +func TestNewProber(t *testing.T) { + runner := &containertest.FakeContainerCommandRunner{} + recorder := &record.FakeRecorder{} + prober := newProber(runner, recorder) + + assert.NotNil(t, prober, "Expected prober to be non-nil") + assert.Equal(t, runner, prober.runner, "Expected prober runner to match") + assert.Equal(t, recorder, prober.recorder, "Expected prober recorder to match") + + assert.NotNil(t, prober.exec, "exec probe initialized") + assert.NotNil(t, prober.http, "http probe initialized") + assert.NotNil(t, prober.tcp, "tcp probe initialized") + assert.NotNil(t, prober.grpc, "grpc probe initialized") + +} + +func TestRecordContainerEventUnknownStatus(t *testing.T) { + + err := v1.AddToScheme(legacyscheme.Scheme) + require.NoError(t, err, "failed to add v1 to scheme") + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "test-probe-pod", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "test-probe-container", + }, + }, + }, + } + + container := pod.Spec.Containers[0] + output := "probe output" + + testCases := []struct { + name string + probeType probeType + result probe.Result + expected []string + }{ + { + name: "Readiness Probe Unknown", + probeType: readiness, + result: probe.Unknown, + expected: []string{ + "Warning ContainerProbeWarning Readiness probe warning: probe output", + "Warning ContainerProbeWarning Unknown Readiness probe status: unknown", + }, + }, + { + name: "Liveness Probe Unknown", + probeType: liveness, + result: probe.Unknown, + expected: []string{ + "Warning ContainerProbeWarning Liveness probe warning: probe output", + "Warning ContainerProbeWarning Unknown Liveness probe status: unknown", + }, + }, + { + name: "Startup Probe Unknown", + probeType: startup, + result: probe.Unknown, + expected: []string{ + "Warning ContainerProbeWarning Startup probe warning: probe output", + "Warning ContainerProbeWarning Unknown Startup probe status: unknown", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + bufferSize := len(tc.expected) + 1 + fakeRecorder := record.NewFakeRecorder(bufferSize) + + pb := &prober{ + recorder: fakeRecorder, + } + + pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "%s probe warning: %s", tc.probeType, output) + pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "Unknown %s probe status: %s", tc.probeType, tc.result) + + assert.Equal(t, len(tc.expected), len(fakeRecorder.Events), "unexpected number of events") + for _, expected := range tc.expected { + assert.Equal(t, expected, <-fakeRecorder.Events) + } + }) + } +}