Merge pull request #125901 from jralmaraz/kubelet_prober

Report event for the cases when probe returned Unknown result
This commit is contained in:
Kubernetes Prow Robot 2025-01-09 05:20:33 -08:00 committed by GitHub
commit f34d791b13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 158 additions and 35 deletions

View File

@ -98,24 +98,37 @@ func (pb *prober) probe(ctx context.Context, probeType probeType, pod *v1.Pod, s
}
result, output, err := pb.runProbeWithRetries(ctx, probeType, probeSpec, pod, status, container, containerID, maxProbeRetries)
if err != nil || (result != probe.Success && result != probe.Warning) {
// Probe failed in one way or another.
if err != nil {
klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored: %v", probeType, err)
} else { // result != probe.Success
klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output)
}
if err != nil {
// Handle probe error
klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored and resulted in %s state: %s", probeType, result, err)
return results.Failure, err
}
if result == probe.Warning {
switch result {
case probe.Success:
klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
return results.Success, nil
case probe.Warning:
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerProbeWarning, "%s probe warning: %s", probeType, output)
klog.V(3).InfoS("Probe succeeded with a warning", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "output", output)
} else {
klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
return results.Success, nil
case probe.Failure:
klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output)
return results.Failure, nil
case probe.Unknown:
klog.V(1).InfoS("Probe unknown without error", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
return results.Failure, nil
default:
klog.V(1).InfoS("Unsupported probe result", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result)
return results.Failure, nil
}
return results.Success, nil
}
// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
@ -144,6 +157,8 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
case p.HTTPGet != nil:
req, err := httpprobe.NewRequestForHTTPGetAction(p.HTTPGet, &container, status.PodIP, "probe")
if err != nil {
// Log and record event for Unknown result
klog.V(4).InfoS("HTTP-Probe failed to create request", "error", err)
return probe.Unknown, "", err
}
if klogV4 := klog.V(4); klogV4.Enabled() {
@ -152,13 +167,14 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
path := req.URL.Path
scheme := req.URL.Scheme
headers := p.HTTPGet.HTTPHeaders
klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers)
klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers, "probeType", probeType)
}
return pb.http.Probe(req, timeout)
case p.TCPSocket != nil:
port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container)
if err != nil {
klog.V(4).InfoS("TCP-Probe failed to resolve port", "error", err)
return probe.Unknown, "", err
}
host := p.TCPSocket.Host
@ -178,7 +194,7 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
return pb.grpc.Probe(host, service, int(p.GRPC.Port), timeout)
default:
klog.InfoS("Failed to find probe builder for container", "containerName", container.Name)
klog.V(4).InfoS("Failed to find probe builder for container", "containerName", container.Name)
return probe.Unknown, "", fmt.Errorf("missing probe handler for %s:%s", format.Pod(pod), container.Name)
}
}

View File

@ -25,15 +25,20 @@ import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
"k8s.io/kubernetes/pkg/probe"
execprobe "k8s.io/kubernetes/pkg/probe/exec"
"k8s.io/kubernetes/test/utils/ktesting"
)
func TestGetURLParts(t *testing.T) {
@ -141,6 +146,7 @@ func TestProbe(t *testing.T) {
Exec: &v1.ExecAction{},
},
}
tests := []struct {
probe *v1.Probe
env []v1.EnvVar
@ -149,6 +155,7 @@ func TestProbe(t *testing.T) {
execResult probe.Result
expectedResult results.Result
expectCommand []string
unsupported bool
}{
{ // No probe
probe: nil,
@ -174,18 +181,25 @@ func TestProbe(t *testing.T) {
execResult: probe.Warning,
expectedResult: results.Success,
},
{ // Probe result is unknown
{ // Probe result is unknown with no error
probe: execProbe,
execResult: probe.Unknown,
expectError: false,
expectedResult: results.Failure,
},
{ // Probe has an error
{ // Probe result is unknown with an error
probe: execProbe,
execError: true,
expectError: true,
execResult: probe.Unknown,
expectedResult: results.Failure,
},
{ // Unsupported probe type
probe: nil,
expectedResult: results.Failure,
expectError: true,
unsupported: true,
},
{ // Probe arguments are passed through
probe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
@ -216,13 +230,17 @@ func TestProbe(t *testing.T) {
}
for i, test := range tests {
for _, probeType := range [...]probeType{liveness, readiness, startup} {
for _, pType := range [...]probeType{liveness, readiness, startup} {
if test.unsupported {
pType = probeType(666)
}
prober := &prober{
recorder: &record.FakeRecorder{},
}
testID := fmt.Sprintf("%d-%s", i, probeType)
testID := fmt.Sprintf("%d-%s", i, pType)
testContainer := v1.Container{Env: test.env}
switch probeType {
switch pType {
case liveness:
testContainer.LivenessProbe = test.probe
case readiness:
@ -236,25 +254,22 @@ func TestProbe(t *testing.T) {
prober.exec = fakeExecProber{test.execResult, nil}
}
result, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
if test.expectError && err == nil {
t.Errorf("[%s] Expected probe error but no error was returned.", testID)
}
if !test.expectError && err != nil {
t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err)
}
if test.expectedResult != result {
t.Errorf("[%s] Expected result to be %v but was %v", testID, test.expectedResult, result)
result, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
if test.expectError {
require.Error(t, err, "[%s] Expected probe error but no error was returned.", testID)
} else {
require.NoError(t, err, "[%s] Didn't expect probe error", testID)
}
require.Equal(t, test.expectedResult, result, "[%s] Expected result to be %v but was %v", testID, test.expectedResult, result)
if len(test.expectCommand) > 0 {
prober.exec = execprobe.New()
prober.runner = &containertest.FakeContainerCommandRunner{}
_, err := prober.probe(ctx, probeType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
if err != nil {
t.Errorf("[%s] Didn't expect probe error but got: %v", testID, err)
continue
}
_, err := prober.probe(ctx, pType, &v1.Pod{}, v1.PodStatus{}, testContainer, containerID)
require.NoError(t, err, "[%s] Didn't expect probe error ", testID)
if !reflect.DeepEqual(test.expectCommand, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd) {
t.Errorf("[%s] unexpected probe arguments: %v", testID, prober.runner.(*containertest.FakeContainerCommandRunner).Cmd)
}
@ -264,7 +279,7 @@ func TestProbe(t *testing.T) {
}
func TestNewExecInContainer(t *testing.T) {
ctx := context.Background()
ctx := ktesting.Init(t)
limit := 1024
tenKilobyte := strings.Repeat("logs-123", 128*10)
@ -333,3 +348,95 @@ func TestNewExecInContainer(t *testing.T) {
}
}
}
func TestNewProber(t *testing.T) {
runner := &containertest.FakeContainerCommandRunner{}
recorder := &record.FakeRecorder{}
prober := newProber(runner, recorder)
assert.NotNil(t, prober, "Expected prober to be non-nil")
assert.Equal(t, runner, prober.runner, "Expected prober runner to match")
assert.Equal(t, recorder, prober.recorder, "Expected prober recorder to match")
assert.NotNil(t, prober.exec, "exec probe initialized")
assert.NotNil(t, prober.http, "http probe initialized")
assert.NotNil(t, prober.tcp, "tcp probe initialized")
assert.NotNil(t, prober.grpc, "grpc probe initialized")
}
func TestRecordContainerEventUnknownStatus(t *testing.T) {
err := v1.AddToScheme(legacyscheme.Scheme)
require.NoError(t, err, "failed to add v1 to scheme")
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "test-probe-pod",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "test-probe-container",
},
},
},
}
container := pod.Spec.Containers[0]
output := "probe output"
testCases := []struct {
name string
probeType probeType
result probe.Result
expected []string
}{
{
name: "Readiness Probe Unknown",
probeType: readiness,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Readiness probe warning: probe output",
"Warning ContainerProbeWarning Unknown Readiness probe status: unknown",
},
},
{
name: "Liveness Probe Unknown",
probeType: liveness,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Liveness probe warning: probe output",
"Warning ContainerProbeWarning Unknown Liveness probe status: unknown",
},
},
{
name: "Startup Probe Unknown",
probeType: startup,
result: probe.Unknown,
expected: []string{
"Warning ContainerProbeWarning Startup probe warning: probe output",
"Warning ContainerProbeWarning Unknown Startup probe status: unknown",
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
bufferSize := len(tc.expected) + 1
fakeRecorder := record.NewFakeRecorder(bufferSize)
pb := &prober{
recorder: fakeRecorder,
}
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "%s probe warning: %s", tc.probeType, output)
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, "ContainerProbeWarning", "Unknown %s probe status: %s", tc.probeType, tc.result)
assert.Equal(t, len(tc.expected), len(fakeRecorder.Events), "unexpected number of events")
for _, expected := range tc.expected {
assert.Equal(t, expected, <-fakeRecorder.Events)
}
})
}
}