Merge pull request #128998 from bart0sh/PR165-migrate-oom-to-contextual-logging

kubelet: Migrate pkg/kubelet/oom to contextual logging
This commit is contained in:
Kubernetes Prow Robot
2025-01-28 13:33:22 -08:00
committed by GitHub
9 changed files with 30 additions and 12 deletions

View File

@@ -171,6 +171,7 @@ linters-settings: # please keep this alphabetized
contextual k8s.io/kubernetes/pkg/kubelet/clustertrustbundle/.*
contextual k8s.io/kubernetes/pkg/kubelet/token/.*
contextual k8s.io/kubernetes/pkg/kubelet/cadvisor/.*
contextual k8s.io/kubernetes/pkg/kubelet/oom/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -217,6 +217,7 @@ linters-settings: # please keep this alphabetized
contextual k8s.io/kubernetes/pkg/kubelet/clustertrustbundle/.*
contextual k8s.io/kubernetes/pkg/kubelet/token/.*
contextual k8s.io/kubernetes/pkg/kubelet/cadvisor/.*
contextual k8s.io/kubernetes/pkg/kubelet/oom/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -219,6 +219,7 @@ linters-settings: # please keep this alphabetized
contextual k8s.io/kubernetes/pkg/kubelet/clustertrustbundle/.*
contextual k8s.io/kubernetes/pkg/kubelet/token/.*
contextual k8s.io/kubernetes/pkg/kubelet/cadvisor/.*
contextual k8s.io/kubernetes/pkg/kubelet/oom/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -54,6 +54,7 @@ contextual k8s.io/kubernetes/pkg/kubelet/pleg/.*
contextual k8s.io/kubernetes/pkg/kubelet/clustertrustbundle/.*
contextual k8s.io/kubernetes/pkg/kubelet/token/.*
contextual k8s.io/kubernetes/pkg/kubelet/cadvisor/.*
contextual k8s.io/kubernetes/pkg/kubelet/oom/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -1575,7 +1575,7 @@ func (kl *Kubelet) StartGarbageCollection() {
// initializeModules will initialize internal modules that do not require the container runtime to be up.
// Note that the modules here must not depend on modules that are not initialized here.
func (kl *Kubelet) initializeModules() error {
func (kl *Kubelet) initializeModules(ctx context.Context) error {
// Prometheus metrics.
metrics.Register(
collectors.NewVolumeStatsCollector(kl),
@@ -1614,7 +1614,7 @@ func (kl *Kubelet) initializeModules() error {
// Start out of memory watcher.
if kl.oomWatcher != nil {
if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
if err := kl.oomWatcher.Start(ctx, kl.nodeRef); err != nil {
return fmt.Errorf("failed to start OOM watcher: %w", err)
}
}
@@ -1721,7 +1721,7 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
go kl.cloudResourceSyncManager.Run(wait.NeverStop)
}
if err := kl.initializeModules(); err != nil {
if err := kl.initializeModules(ctx); err != nil {
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.KubeletSetupFailed, err.Error())
klog.ErrorS(err, "Failed to initialize internal modules")
os.Exit(1)

View File

@@ -20,6 +20,7 @@ limitations under the License.
package oom
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
@@ -71,16 +72,17 @@ const (
)
// Start watches for system oom's and records an event for every system oom encountered.
func (ow *realWatcher) Start(ref *v1.ObjectReference) error {
func (ow *realWatcher) Start(ctx context.Context, ref *v1.ObjectReference) error {
outStream := make(chan *oomparser.OomInstance, 10)
go ow.oomStreamer.StreamOoms(outStream)
go func() {
logger := klog.FromContext(ctx)
defer runtime.HandleCrash()
for event := range outStream {
if event.VictimContainerName == recordEventContainerName {
klog.V(1).InfoS("Got sys oom event", "event", event)
logger.V(1).Info("Got sys oom event", "event", event)
eventMsg := "System OOM encountered"
if event.ProcessName != "" && event.Pid != 0 {
eventMsg = fmt.Sprintf("%s, victim process: %s, pid: %d", eventMsg, event.ProcessName, event.Pid)
@@ -88,7 +90,7 @@ func (ow *realWatcher) Start(ref *v1.ObjectReference) error {
ow.recorder.Eventf(ref, v1.EventTypeWarning, systemOOMEvent, eventMsg)
}
}
klog.ErrorS(nil, "Unexpectedly stopped receiving OOM notifications")
logger.Error(nil, "Unexpectedly stopped receiving OOM notifications")
}()
return nil
}

View File

@@ -23,9 +23,11 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/test/utils/ktesting"
"github.com/google/cadvisor/utils/oomparser"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type fakeStreamer struct {
@@ -41,6 +43,7 @@ func (fs *fakeStreamer) StreamOoms(outStream chan<- *oomparser.OomInstance) {
// TestWatcherRecordsEventsForOomEvents ensures that our OomInstances coming
// from `StreamOoms` are translated into events in our recorder.
func TestWatcherRecordsEventsForOomEvents(t *testing.T) {
tCtx := ktesting.Init(t)
oomInstancesToStream := []*oomparser.OomInstance{
{
Pid: 1000,
@@ -63,7 +66,7 @@ func TestWatcherRecordsEventsForOomEvents(t *testing.T) {
recorder: fakeRecorder,
oomStreamer: fakeStreamer,
}
assert.NoError(t, oomWatcher.Start(node))
require.NoError(t, oomWatcher.Start(tCtx, node))
eventsRecorded := getRecordedEvents(fakeRecorder, numExpectedOomEvents)
assert.Len(t, eventsRecorded, numExpectedOomEvents)
@@ -92,6 +95,7 @@ func getRecordedEvents(fakeRecorder *record.FakeRecorder, numExpectedOomEvents i
func TestWatcherRecordsEventsForOomEventsCorrectContainerName(t *testing.T) {
// By "incorrect" container name, we mean a container name for which we
// don't want to record an oom event.
tCtx := ktesting.Init(t)
numOomEventsWithIncorrectContainerName := 1
oomInstancesToStream := []*oomparser.OomInstance{
{
@@ -122,7 +126,7 @@ func TestWatcherRecordsEventsForOomEventsCorrectContainerName(t *testing.T) {
recorder: fakeRecorder,
oomStreamer: fakeStreamer,
}
assert.NoError(t, oomWatcher.Start(node))
require.NoError(t, oomWatcher.Start(tCtx, node))
eventsRecorded := getRecordedEvents(fakeRecorder, numExpectedOomEvents)
assert.Len(t, eventsRecorded, numExpectedOomEvents)
@@ -135,6 +139,8 @@ func TestWatcherRecordsEventsForOomEventsWithAdditionalInfo(t *testing.T) {
eventPid := 1000
processName := "fakeProcess"
tCtx := ktesting.Init(t)
oomInstancesToStream := []*oomparser.OomInstance{
{
Pid: eventPid,
@@ -157,7 +163,7 @@ func TestWatcherRecordsEventsForOomEventsWithAdditionalInfo(t *testing.T) {
recorder: fakeRecorder,
oomStreamer: fakeStreamer,
}
assert.NoError(t, oomWatcher.Start(node))
require.NoError(t, oomWatcher.Start(tCtx, node))
eventsRecorded := getRecordedEvents(fakeRecorder, numExpectedOomEvents)

View File

@@ -20,6 +20,8 @@ limitations under the License.
package oom
import (
"context"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"
)
@@ -33,6 +35,6 @@ func NewWatcher(_ record.EventRecorder) (Watcher, error) {
return &oomWatcherUnsupported{}, nil
}
func (ow *oomWatcherUnsupported) Start(_ *v1.ObjectReference) error {
func (ow *oomWatcherUnsupported) Start(_ context.Context, _ *v1.ObjectReference) error {
return nil
}

View File

@@ -16,9 +16,13 @@ limitations under the License.
package oom
import v1 "k8s.io/api/core/v1"
import (
"context"
v1 "k8s.io/api/core/v1"
)
// Watcher defines the interface of OOM watchers.
type Watcher interface {
Start(ref *v1.ObjectReference) error
Start(ctx context.Context, ref *v1.ObjectReference) error
}