mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Adding events for system ooms.
Kubelet will continuously watch for system OOMs and generate events whenever it encounters a system OOM.
This commit is contained in:
parent
820d0f3e83
commit
020950fd0f
@ -36,6 +36,7 @@ import (
|
||||
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
"github.com/golang/glog"
|
||||
"github.com/golang/groupcache/lru"
|
||||
)
|
||||
|
@ -53,6 +53,7 @@ import (
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/version"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/volume"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
"github.com/golang/glog"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
)
|
||||
@ -214,6 +215,8 @@ func NewMainKubelet(
|
||||
|
||||
volumeManager := newVolumeManager()
|
||||
|
||||
oomWatcher := NewOOMWatcher(cadvisorInterface, recorder)
|
||||
|
||||
klet := &Kubelet{
|
||||
hostname: hostname,
|
||||
dockerClient: dockerClient,
|
||||
@ -243,6 +246,7 @@ func NewMainKubelet(
|
||||
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
|
||||
resourceContainer: resourceContainer,
|
||||
os: osInterface,
|
||||
oomWatcher: oomWatcher,
|
||||
}
|
||||
|
||||
klet.podManager = newBasicPodManager(klet.kubeClient)
|
||||
@ -388,12 +392,12 @@ type Kubelet struct {
|
||||
// status. Kubelet may fail to update node status reliablly if the value is too small,
|
||||
// as it takes time to gather all necessary node information.
|
||||
nodeStatusUpdateFrequency time.Duration
|
||||
|
||||
// The name of the resource-only container to run the Kubelet in (empty for no container).
|
||||
// Name must be absolute.
|
||||
resourceContainer string
|
||||
|
||||
os kubecontainer.OSInterface
|
||||
os kubecontainer.OSInterface
|
||||
oomWatcher OOMWatcher
|
||||
}
|
||||
|
||||
// getRootDir returns the full path to the directory under which kubelet can
|
||||
@ -583,10 +587,20 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
|
||||
}
|
||||
|
||||
go kl.syncNodeStatus()
|
||||
// Run the system oom watcher forever.
|
||||
go util.Until(kl.runOOMWatcher, time.Second, util.NeverStop)
|
||||
kl.statusManager.Start()
|
||||
kl.syncLoop(updates, kl)
|
||||
}
|
||||
|
||||
// Watches for system OOMs.
|
||||
func (kl *Kubelet) runOOMWatcher() {
|
||||
glog.V(5).Infof("Starting to record system OOMs")
|
||||
if err := kl.oomWatcher.RecordSysOOMs(kl.nodeRef); err != nil {
|
||||
glog.Errorf("failed to record system OOMs - %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// syncNodeStatus periodically synchronizes node status to master.
|
||||
func (kl *Kubelet) syncNodeStatus() {
|
||||
if kl.kubeClient == nil {
|
||||
@ -1809,6 +1823,8 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
||||
}
|
||||
oldNodeUnschedulable = node.Spec.Unschedulable
|
||||
}
|
||||
|
||||
// Update the current status on the API server
|
||||
_, err = kl.kubeClient.Nodes().UpdateStatus(node)
|
||||
return err
|
||||
}
|
||||
|
67
pkg/kubelet/oom_watcher.go
Normal file
67
pkg/kubelet/oom_watcher.go
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright 2015 Google Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package kubelet
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/golang/glog"
|
||||
"github.com/google/cadvisor/events"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
)
|
||||
|
||||
type OOMWatcher interface {
|
||||
RecordSysOOMs(ref *api.ObjectReference) error
|
||||
}
|
||||
|
||||
type realOOMWatcher struct {
|
||||
cadvisor cadvisor.Interface
|
||||
recorder record.EventRecorder
|
||||
}
|
||||
|
||||
func NewOOMWatcher(cadvisor cadvisor.Interface, recorder record.EventRecorder) OOMWatcher {
|
||||
return &realOOMWatcher{
|
||||
cadvisor: cadvisor,
|
||||
recorder: recorder,
|
||||
}
|
||||
}
|
||||
|
||||
const systemOOMEvent = "SystemOOM"
|
||||
|
||||
// Watches cadvisor for system oom's and records an event for every system oom encountered.
|
||||
func (ow *realOOMWatcher) RecordSysOOMs(ref *api.ObjectReference) error {
|
||||
request := events.Request{
|
||||
EventType: map[cadvisorApi.EventType]bool{
|
||||
cadvisorApi.EventOom: true,
|
||||
},
|
||||
ContainerName: "/",
|
||||
IncludeSubcontainers: false,
|
||||
}
|
||||
eventChannel, err := ow.cadvisor.WatchEvents(&request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for event := range eventChannel.GetChannel() {
|
||||
glog.V(2).Infof("got sys oom event from cadvisor: %v", event)
|
||||
ow.recorder.PastEventf(ref, util.Time{event.Timestamp}, systemOOMEvent, "System OOM encountered")
|
||||
}
|
||||
return fmt.Errorf("failed to watch cadvisor for sys oom events")
|
||||
}
|
62
pkg/kubelet/oom_watcher_test.go
Normal file
62
pkg/kubelet/oom_watcher_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright 2015 Google Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package kubelet
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
)
|
||||
|
||||
type fakeEvent struct {
|
||||
object runtime.Object
|
||||
timestamp util.Time
|
||||
reason string
|
||||
message string
|
||||
}
|
||||
|
||||
type fakeRecorder struct {
|
||||
events []fakeEvent
|
||||
}
|
||||
|
||||
func (f fakeRecorder) Event(object runtime.Object, reason, message string) {
|
||||
f.events = append(f.events, fakeEvent{object, util.Now(), reason, message})
|
||||
}
|
||||
|
||||
func (f fakeRecorder) Eventf(object runtime.Object, reason, messageFmt string, args ...interface{}) {
|
||||
f.events = append(f.events, fakeEvent{object, util.Now(), reason, fmt.Sprintf(messageFmt, args...)})
|
||||
}
|
||||
|
||||
func (f fakeRecorder) PastEventf(object runtime.Object, timestamp util.Time, reason, messageFmt string, args ...interface{}) {
|
||||
f.events = append(f.events, fakeEvent{object, timestamp, reason, fmt.Sprintf(messageFmt, args...)})
|
||||
}
|
||||
|
||||
func TestBasic(t *testing.T) {
|
||||
fakeRecorder := fakeRecorder{}
|
||||
mockCadvisor := &cadvisor.Fake{}
|
||||
node := &api.ObjectReference{}
|
||||
oomWatcher := NewOOMWatcher(mockCadvisor, fakeRecorder)
|
||||
go func() {
|
||||
oomWatcher.RecordSysOOMs(node)
|
||||
}()
|
||||
// TODO: Improve this test once cadvisor exports events.EventChannel as an interface
|
||||
// and thereby allow using a mock version of cadvisor.
|
||||
}
|
Loading…
Reference in New Issue
Block a user