mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Adding events for system ooms.
Kubelet will continuously watch for system OOMs and generate events whenever it encounters a system OOM.
This commit is contained in:
parent
820d0f3e83
commit
020950fd0f
@ -36,6 +36,7 @@ import (
|
|||||||
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||||
|
docker "github.com/fsouza/go-dockerclient"
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"github.com/golang/groupcache/lru"
|
"github.com/golang/groupcache/lru"
|
||||||
)
|
)
|
||||||
|
@ -53,6 +53,7 @@ import (
|
|||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/version"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/version"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/volume"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/volume"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||||
|
docker "github.com/fsouza/go-dockerclient"
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||||
)
|
)
|
||||||
@ -214,6 +215,8 @@ func NewMainKubelet(
|
|||||||
|
|
||||||
volumeManager := newVolumeManager()
|
volumeManager := newVolumeManager()
|
||||||
|
|
||||||
|
oomWatcher := NewOOMWatcher(cadvisorInterface, recorder)
|
||||||
|
|
||||||
klet := &Kubelet{
|
klet := &Kubelet{
|
||||||
hostname: hostname,
|
hostname: hostname,
|
||||||
dockerClient: dockerClient,
|
dockerClient: dockerClient,
|
||||||
@ -243,6 +246,7 @@ func NewMainKubelet(
|
|||||||
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
|
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
|
||||||
resourceContainer: resourceContainer,
|
resourceContainer: resourceContainer,
|
||||||
os: osInterface,
|
os: osInterface,
|
||||||
|
oomWatcher: oomWatcher,
|
||||||
}
|
}
|
||||||
|
|
||||||
klet.podManager = newBasicPodManager(klet.kubeClient)
|
klet.podManager = newBasicPodManager(klet.kubeClient)
|
||||||
@ -388,12 +392,12 @@ type Kubelet struct {
|
|||||||
// status. Kubelet may fail to update node status reliablly if the value is too small,
|
// status. Kubelet may fail to update node status reliablly if the value is too small,
|
||||||
// as it takes time to gather all necessary node information.
|
// as it takes time to gather all necessary node information.
|
||||||
nodeStatusUpdateFrequency time.Duration
|
nodeStatusUpdateFrequency time.Duration
|
||||||
|
|
||||||
// The name of the resource-only container to run the Kubelet in (empty for no container).
|
// The name of the resource-only container to run the Kubelet in (empty for no container).
|
||||||
// Name must be absolute.
|
// Name must be absolute.
|
||||||
resourceContainer string
|
resourceContainer string
|
||||||
|
|
||||||
os kubecontainer.OSInterface
|
os kubecontainer.OSInterface
|
||||||
|
oomWatcher OOMWatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
// getRootDir returns the full path to the directory under which kubelet can
|
// getRootDir returns the full path to the directory under which kubelet can
|
||||||
@ -583,10 +587,20 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
go kl.syncNodeStatus()
|
go kl.syncNodeStatus()
|
||||||
|
// Run the system oom watcher forever.
|
||||||
|
go util.Until(kl.runOOMWatcher, time.Second, util.NeverStop)
|
||||||
kl.statusManager.Start()
|
kl.statusManager.Start()
|
||||||
kl.syncLoop(updates, kl)
|
kl.syncLoop(updates, kl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Watches for system OOMs.
|
||||||
|
func (kl *Kubelet) runOOMWatcher() {
|
||||||
|
glog.V(5).Infof("Starting to record system OOMs")
|
||||||
|
if err := kl.oomWatcher.RecordSysOOMs(kl.nodeRef); err != nil {
|
||||||
|
glog.Errorf("failed to record system OOMs - %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// syncNodeStatus periodically synchronizes node status to master.
|
// syncNodeStatus periodically synchronizes node status to master.
|
||||||
func (kl *Kubelet) syncNodeStatus() {
|
func (kl *Kubelet) syncNodeStatus() {
|
||||||
if kl.kubeClient == nil {
|
if kl.kubeClient == nil {
|
||||||
@ -1809,6 +1823,8 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
|
|||||||
}
|
}
|
||||||
oldNodeUnschedulable = node.Spec.Unschedulable
|
oldNodeUnschedulable = node.Spec.Unschedulable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the current status on the API server
|
||||||
_, err = kl.kubeClient.Nodes().UpdateStatus(node)
|
_, err = kl.kubeClient.Nodes().UpdateStatus(node)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
67
pkg/kubelet/oom_watcher.go
Normal file
67
pkg/kubelet/oom_watcher.go
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2015 Google Inc. All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package kubelet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||||
|
"github.com/golang/glog"
|
||||||
|
"github.com/google/cadvisor/events"
|
||||||
|
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
type OOMWatcher interface {
|
||||||
|
RecordSysOOMs(ref *api.ObjectReference) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type realOOMWatcher struct {
|
||||||
|
cadvisor cadvisor.Interface
|
||||||
|
recorder record.EventRecorder
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewOOMWatcher(cadvisor cadvisor.Interface, recorder record.EventRecorder) OOMWatcher {
|
||||||
|
return &realOOMWatcher{
|
||||||
|
cadvisor: cadvisor,
|
||||||
|
recorder: recorder,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const systemOOMEvent = "SystemOOM"
|
||||||
|
|
||||||
|
// Watches cadvisor for system oom's and records an event for every system oom encountered.
|
||||||
|
func (ow *realOOMWatcher) RecordSysOOMs(ref *api.ObjectReference) error {
|
||||||
|
request := events.Request{
|
||||||
|
EventType: map[cadvisorApi.EventType]bool{
|
||||||
|
cadvisorApi.EventOom: true,
|
||||||
|
},
|
||||||
|
ContainerName: "/",
|
||||||
|
IncludeSubcontainers: false,
|
||||||
|
}
|
||||||
|
eventChannel, err := ow.cadvisor.WatchEvents(&request)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for event := range eventChannel.GetChannel() {
|
||||||
|
glog.V(2).Infof("got sys oom event from cadvisor: %v", event)
|
||||||
|
ow.recorder.PastEventf(ref, util.Time{event.Timestamp}, systemOOMEvent, "System OOM encountered")
|
||||||
|
}
|
||||||
|
return fmt.Errorf("failed to watch cadvisor for sys oom events")
|
||||||
|
}
|
62
pkg/kubelet/oom_watcher_test.go
Normal file
62
pkg/kubelet/oom_watcher_test.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2015 Google Inc. All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package kubelet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fakeEvent struct {
|
||||||
|
object runtime.Object
|
||||||
|
timestamp util.Time
|
||||||
|
reason string
|
||||||
|
message string
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeRecorder struct {
|
||||||
|
events []fakeEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeRecorder) Event(object runtime.Object, reason, message string) {
|
||||||
|
f.events = append(f.events, fakeEvent{object, util.Now(), reason, message})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeRecorder) Eventf(object runtime.Object, reason, messageFmt string, args ...interface{}) {
|
||||||
|
f.events = append(f.events, fakeEvent{object, util.Now(), reason, fmt.Sprintf(messageFmt, args...)})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeRecorder) PastEventf(object runtime.Object, timestamp util.Time, reason, messageFmt string, args ...interface{}) {
|
||||||
|
f.events = append(f.events, fakeEvent{object, timestamp, reason, fmt.Sprintf(messageFmt, args...)})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBasic(t *testing.T) {
|
||||||
|
fakeRecorder := fakeRecorder{}
|
||||||
|
mockCadvisor := &cadvisor.Fake{}
|
||||||
|
node := &api.ObjectReference{}
|
||||||
|
oomWatcher := NewOOMWatcher(mockCadvisor, fakeRecorder)
|
||||||
|
go func() {
|
||||||
|
oomWatcher.RecordSysOOMs(node)
|
||||||
|
}()
|
||||||
|
// TODO: Improve this test once cadvisor exports events.EventChannel as an interface
|
||||||
|
// and thereby allow using a mock version of cadvisor.
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user