Merge pull request #8430 from vmarmol/logging

Don't run OOM watcher is cAdvisor is not available.
This commit is contained in:
Vish Kannan 2015-05-18 14:41:24 -07:00
commit c5f7ee6f96
3 changed files with 24 additions and 21 deletions

View File

@ -675,22 +675,19 @@ func (kl *Kubelet) Run(updates <-chan PodUpdate) {
glog.Errorf("Failed to start ContainerManager, system may not be properly isolated: %v", err)
}
err = kl.oomWatcher.Start(kl.nodeRef)
if err != nil {
kl.recorder.Eventf(kl.nodeRef, "kubeletSetupFailed", "Failed to start OOM watcher %v", err)
glog.Errorf("Failed to start OOM watching: %v", err)
}
go util.Until(kl.updateRuntimeUp, 5*time.Second, util.NeverStop)
go kl.syncNodeStatus()
// Run the system oom watcher forever.
go util.Until(kl.runOOMWatcher, time.Second, util.NeverStop)
kl.statusManager.Start()
kl.syncLoop(updates, kl)
}
// Watches for system OOMs.
func (kl *Kubelet) runOOMWatcher() {
glog.V(5).Infof("Starting to record system OOMs")
if err := kl.oomWatcher.RecordSysOOMs(kl.nodeRef); err != nil {
glog.Errorf("failed to record system OOMs - %v", err)
}
}
// syncNodeStatus periodically synchronizes node status to master.
func (kl *Kubelet) syncNodeStatus() {
if kl.kubeClient == nil {

View File

@ -17,8 +17,6 @@ limitations under the License.
package kubelet
import (
"fmt"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
@ -29,7 +27,7 @@ import (
)
type OOMWatcher interface {
RecordSysOOMs(ref *api.ObjectReference) error
Start(ref *api.ObjectReference) error
}
type realOOMWatcher struct {
@ -47,7 +45,7 @@ func NewOOMWatcher(cadvisor cadvisor.Interface, recorder record.EventRecorder) O
const systemOOMEvent = "SystemOOM"
// Watches cadvisor for system oom's and records an event for every system oom encountered.
func (ow *realOOMWatcher) RecordSysOOMs(ref *api.ObjectReference) error {
func (ow *realOOMWatcher) Start(ref *api.ObjectReference) error {
request := events.Request{
EventType: map[cadvisorApi.EventType]bool{
cadvisorApi.EventOom: true,
@ -59,9 +57,15 @@ func (ow *realOOMWatcher) RecordSysOOMs(ref *api.ObjectReference) error {
if err != nil {
return err
}
for event := range eventChannel.GetChannel() {
glog.V(2).Infof("got sys oom event from cadvisor: %v", event)
ow.recorder.PastEventf(ref, util.Time{event.Timestamp}, systemOOMEvent, "System OOM encountered")
}
return fmt.Errorf("failed to watch cadvisor for sys oom events")
go func() {
defer util.HandleCrash()
for event := range eventChannel.GetChannel() {
glog.V(2).Infof("Got sys oom event from cadvisor: %v", event)
ow.recorder.PastEventf(ref, util.Time{event.Timestamp}, systemOOMEvent, "System OOM encountered")
}
glog.Errorf("Unexpectedly stopped receiving OOM notifications from cAdvisor")
}()
return nil
}

View File

@ -54,9 +54,11 @@ func TestBasic(t *testing.T) {
mockCadvisor := &cadvisor.Fake{}
node := &api.ObjectReference{}
oomWatcher := NewOOMWatcher(mockCadvisor, fakeRecorder)
go func() {
oomWatcher.RecordSysOOMs(node)
}()
err := oomWatcher.Start(node)
if err != nil {
t.Errorf("Should not have failed: %v", err)
}
// TODO: Improve this test once cadvisor exports events.EventChannel as an interface
// and thereby allow using a mock version of cadvisor.
}