Merge pull request #125901 from jralmaraz/kubelet_prober

Report event for the cases when probe returned Unknown result
This commit is contained in:
Kubernetes Prow Robot 2025-01-09 05:20:33 -08:00 committed by GitHub
commit f34d791b13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 158 additions and 35 deletions

View File

@ -98,24 +98,37 @@ func (pb *prober) probe(ctx context.Context, probeType probeType, pod *v1.Pod, s
} }
result, output, err := pb.runProbeWithRetries(ctx, probeType, probeSpec, pod, status, container, containerID, maxProbeRetries) result, output, err := pb.runProbeWithRetries(ctx, probeType, probeSpec, pod, status, container, containerID, maxProbeRetries)
if err != nil || (result != probe.Success && result != probe.Warning) {
// Probe failed in one way or another.
if err != nil { if err != nil {
klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) // Handle probe error
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored: %v", probeType, err) klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
} else { // result != probe.Success pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored and resulted in %s state: %s", probeType, result, err)
klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output)
}
return results.Failure, err return results.Failure, err
} }
if result == probe.Warning {
switch result {
case probe.Success:
klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
return results.Success, nil
case probe.Warning:
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerProbeWarning, "%s probe warning: %s", probeType, output) pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerProbeWarning, "%s probe warning: %s", probeType, output)
klog.V(3).InfoS("Probe succeeded with a warning", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "output", output) klog.V(3).InfoS("Probe succeeded with a warning", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "output", output)
} else {
klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
}
return results.Success, nil return results.Success, nil
case probe.Failure:
klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output)
return results.Failure, nil
case probe.Unknown:
klog.V(1).InfoS("Probe unknown without error", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
return results.Failure, nil
default:
klog.V(1).InfoS("Unsupported probe result", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
return results.Failure, nil
}
} }
// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result // runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
@ -144,6 +157,8 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
case p.HTTPGet != nil: case p.HTTPGet != nil:
req, err := httpprobe.NewRequestForHTTPGetAction(p.HTTPGet, &container, status.PodIP, "probe") req, err := httpprobe.NewRequestForHTTPGetAction(p.HTTPGet, &container, status.PodIP, "probe")
if err != nil { if err != nil {
// Log and record event for Unknown result
klog.V(4).InfoS("HTTP-Probe failed to create request", "error", err)
return probe.Unknown, "", err return probe.Unknown, "", err
} }
if klogV4 := klog.V(4); klogV4.Enabled() { if klogV4 := klog.V(4); klogV4.Enabled() {
@ -152,13 +167,14 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
path := req.URL.Path path := req.URL.Path
scheme := req.URL.Scheme scheme := req.URL.Scheme
headers := p.HTTPGet.HTTPHeaders headers := p.HTTPGet.HTTPHeaders
klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers) klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers, "probeType", probeType)
} }
return pb.http.Probe(req, timeout) return pb.http.Probe(req, timeout)
case p.TCPSocket != nil: case p.TCPSocket != nil:
port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container) port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container)
if err != nil { if err != nil {
klog.V(4).InfoS("TCP-Probe failed to resolve port", "error", err)
return probe.Unknown, "", err return probe.Unknown, "", err
} }
host := p.TCPSocket.Host host := p.TCPSocket.Host
@ -178,7 +194,7 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
return pb.grpc.Probe(host, service, int(p.GRPC.Port), timeout) return pb.grpc.Probe(host, service, int(p.GRPC.Port), timeout)
default: default:
klog.InfoS("Failed to find probe builder for container", "containerName", container.Name) klog.V(4).InfoS("Failed to find probe builder for container", "containerName", container.Name)
return probe.Unknown, "", fmt.Errorf("missing probe handler for %s:%s", format.Pod(pod), container.Name) return probe.Unknown, "", fmt.Errorf("missing probe handler for %s:%s", format.Pod(pod), container.Name)
} }
} }

View File

@ -25,15 +25,20 @@ import (
"strings" "strings"
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/util/ioutils" "k8s.io/kubernetes/pkg/kubelet/util/ioutils"
"k8s.io/kubernetes/pkg/probe" "k8s.io/kubernetes/pkg/probe"
execprobe "k8s.io/kubernetes/pkg/probe/exec" execprobe "k8s.io/kubernetes/pkg/probe/exec"
"k8s.io/kubernetes/test/utils/ktesting"
) )
func TestGetURLParts(t *testing.T) { func TestGetURLParts(t *testing.T) {
@ -141,6 +146,7 @@ func TestProbe(t *testing.T) {
Exec: &v1.ExecAction{}, Exec: &v1.ExecAction{},
}, },
} }
tests := []struct { tests := []struct {
probe *v1.Probe probe *v1.Probe
env []v1.EnvVar env []v1.EnvVar
@ -149,6 +155,7 @@ func TestProbe(t *testing.T) {
execResult probe.Result execResult probe.Result
expectedResult results.Result expectedResult results.Result
expectCommand []string expectCommand []string
unsupported bool
}{ }{
{ // No probe { // No probe
probe: nil, probe: nil,
@ -174,18 +181,25 @@ func TestProbe(t *testing.T) {
execResult: probe.Warning, execResult: probe.Warning,
expectedResult: results.Success, expectedResult: results.Success,
}, },
{ // Probe result is unknown { // Probe result is unknown with no error
probe: execProbe, probe: execProbe,
execResult: probe.Unknown, execResult: probe.Unknown,
expectError: false,
expectedResult: results.Failure, expectedResult: results.Failure,
}, },
{ // Probe has an error { // Probe result is unknown with an error
probe: execProbe, probe: execProbe,
execError: true, execError: true,
expectError: true, expectError: true,
execResult: probe.Unknown, execResult: probe.Unknown,
expectedResult: results.Failure, expectedResult: results.Failure,
}, },
{ // Unsupported probe type
probe: nil,
expectedResult: results.Failure,
expectError: true,
unsupported: true,
},
{ // Probe arguments are passed through { // Probe arguments are passed through
probe: &v1.Probe{ probe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{ ProbeHandler: v1.ProbeHandler{
@ -216,13 +230,17 @@ func TestProbe(t *testing.T) {
} }
for i, test := range tests { for i, test := range tests {
for _, probeType := range [...]probeType{liveness, readiness, startup} { for _, pType := range [...]probeType{liveness, readiness, startup} {
if test.unsupported {
pType = probeType(666)
}
prober := &prober{ prober := &prober{
recorder: &record.FakeRecorder{}, recorder: &record.FakeRecorder{},
} }
testID := fmt.Sprintf("%d-%s", i, probeType) testID := fmt.Sprintf("%d-%s", i, pType)
testContainer := v1.Container{Env: test.env} testContainer := v1.Container{Env: test.env}
switch probeType { switch pType {
case liveness: case liveness:
testContainer.LivenessProbe = test.probe testContainer.LivenessProbe = test.probe
case readiness: case readiness:
@ -236,25 +254,22 @@ func TestProbe(t *testing.T) {
prober.exec = fakeExecProber{test.execResult, nil} prober.exec = fakeExecProber{test.execResult, nil}
} }
result, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) result, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
if test.expectError && err == nil {
t.Errorf("[%s] Expected probe error but no error was returned.", testID) if test.expectError {
} require.Error(t, err, "[%s] Expected probe error but no error was returned.", testID)
if !test.expectError && err != nil { } else {
t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err) require.NoError(t, err, "[%s] Didn't expect probe error", testID)
}
if test.expectedResult != result {
t.Errorf("[%s] Expected result to be %v but was %v", testID, test.expectedResult, result)
} }
require.Equal(t, test.expectedResult, result, "[%s] Expected result to be %v but was %v", testID, test.expectedResult, result)
if len(test.expectCommand) > 0 { if len(test.expectCommand) > 0 {
prober.exec = execprobe.New() prober.exec = execprobe.New()
prober.runner = &containertest.FakeContainerCommandRunner{} prober.runner = &containertest.FakeContainerCommandRunner{}
_, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID) _, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
if err != nil { require.NoError(t, err, "[%s] Didn't expect probe error ", testID)
t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err)
continue
}
if !reflect.DeepEqual(test.expectCommand, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) { if !reflect.DeepEqual(test.expectCommand, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) {
t.Errorf("[%s] unexpected probe arguments: %v", testID, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) t.Errorf("[%s] unexpected probe arguments: %v", testID, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd)
} }
@ -264,7 +279,7 @@ func TestProbe(t *testing.T) {
} }
func TestNewExecInContainer(t *testing.T) { func TestNewExecInContainer(t *testing.T) {
ctx := context.Background() ctx := ktesting.Init(t)
limit := 1024 limit := 1024
tenKilobyte := strings.Repeat("logs-123", 128*10) tenKilobyte := strings.Repeat("logs-123", 128*10)
@ -333,3 +348,95 @@ func TestNewExecInContainer(t *testing.T) {
} }
} }
} }
func TestNewProber(t *testing.T) {
runner := &containertest.FakeContainerCommandRunner{}
recorder := &record.FakeRecorder{}
prober := newProber(runner, recorder)
assert.NotNil(t, prober, "Expected prober to be non-nil")
assert.Equal(t, runner, prober.runner, "Expected prober runner to match")
assert.Equal(t, recorder, prober.recorder, "Expected prober recorder to match")
assert.NotNil(t, prober.exec, "exec probe initialized")
assert.NotNil(t, prober.http, "http probe initialized")
assert.NotNil(t, prober.tcp, "tcp probe initialized")
assert.NotNil(t, prober.grpc, "grpc probe initialized")
}
func TestRecordContainerEventUnknownStatus(t *testing.T) {
err := v1.AddToScheme(legacyscheme.Scheme)
require.NoError(t, err, "failed to add v1 to scheme")
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "test-probe-pod",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "test-probe-container",
},
},
},
}
container := pod.Spec.Containers[0]
output := "probe output"
testCases := []struct {
name string
probeType probeType
result probe.Result
expected []string
}{
{
name: "Readiness Probe Unknown",
probeType: readiness,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Readiness probe warning: probe output",
"Warning ContainerProbeWarning Unknown Readiness probe status: unknown",
},
},
{
name: "Liveness Probe Unknown",
probeType: liveness,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Liveness probe warning: probe output",
"Warning ContainerProbeWarning Unknown Liveness probe status: unknown",
},
},
{
name: "Startup Probe Unknown",
probeType: startup,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Startup probe warning: probe output",
"Warning ContainerProbeWarning Unknown Startup probe status: unknown",
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
bufferSize := len(tc.expected) + 1
fakeRecorder := record.NewFakeRecorder(bufferSize)
pb := &prober{
recorder: fakeRecorder,
}
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "%s probe warning: %s", tc.probeType, output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "Unknown %s probe status: %s", tc.probeType, tc.result)
assert.Equal(t, len(tc.expected), len(fakeRecorder.Events), "unexpected number of events")
for _, expected := range tc.expected {
assert.Equal(t, expected, <-fakeRecorder.Events)
}
})
}
}