From 67d4c408492593041f2327e5677edd896660db87 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 3 Oct 2017 15:45:06 -0400 Subject: [PATCH] Fix spam of multiattach errors in event logs We should be careful while generating multiattach errors. We seem to be generating too many of them because old code had minor bug. --- .../cache/desired_state_of_world.go | 19 ++++ .../attachdetach/reconciler/reconciler.go | 88 ++++++++++--------- .../reconciler/reconciler_test.go | 11 +++ 3 files changed, 76 insertions(+), 42 deletions(-) diff --git a/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go b/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go index f4481f71618..affd6229a72 100644 --- a/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go +++ b/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go @@ -101,6 +101,10 @@ type DesiredStateOfWorld interface { // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be // mounted and attached for terminated pods GetKeepTerminatedPodVolumesForNode(k8stypes.NodeName) bool + + // Mark multiattach error as reported to prevent spamming multiple + // events for same error + SetMultiAttachError(v1.UniqueVolumeName, k8stypes.NodeName) } // VolumeToAttach represents a volume that should be attached to a node. @@ -329,6 +333,21 @@ func (dsw *desiredStateOfWorld) VolumeExists( return false } +func (dsw *desiredStateOfWorld) SetMultiAttachError( + volumeName v1.UniqueVolumeName, + nodeName k8stypes.NodeName) { + dsw.RLock() + defer dsw.RUnlock() + + nodeObj, nodeExists := dsw.nodesManaged[nodeName] + if nodeExists { + if volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists { + volumeObj.multiAttachErrorReported = true + dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj + } + } +} + // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be // mounted and attached for terminated pods func (dsw *desiredStateOfWorld) GetKeepTerminatedPodVolumesForNode(nodeName k8stypes.NodeName) bool { diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler.go b/pkg/controller/volume/attachdetach/reconciler/reconciler.go index 26133bb3bcd..ac81f98c6ae 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go @@ -241,48 +241,7 @@ func (rc *reconciler) reconcile() { } } - // Ensure volumes that should be attached are attached. - for _, volumeToAttach := range rc.desiredStateOfWorld.GetVolumesToAttach() { - if rc.actualStateOfWorld.VolumeNodeExists( - volumeToAttach.VolumeName, volumeToAttach.NodeName) { - // Volume/Node exists, touch it to reset detachRequestedTime - glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Volume attached--touching", "")) - rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName) - } else { - // Don't even try to start an operation if there is already one running - if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") { - glog.V(10).Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName) - continue - } - - if rc.isMultiAttachForbidden(volumeToAttach.VolumeSpec) { - nodes := rc.actualStateOfWorld.GetNodesForVolume(volumeToAttach.VolumeName) - if len(nodes) > 0 { - if !volumeToAttach.MultiAttachErrorReported { - simpleMsg, detailedMsg := volumeToAttach.GenerateMsg("Multi-Attach error", "Volume is already exclusively attached to one node and can't be attached to another") - for _, pod := range volumeToAttach.ScheduledPods { - rc.recorder.Eventf(pod, v1.EventTypeWarning, kevents.FailedAttachVolume, simpleMsg) - } - volumeToAttach.MultiAttachErrorReported = true - glog.Warningf(detailedMsg) - } - continue - } - } - - // Volume/Node doesn't exist, spawn a goroutine to attach it - glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Starting attacherDetacher.AttachVolume", "")) - err := rc.attacherDetacher.AttachVolume(volumeToAttach.VolumeToAttach, rc.actualStateOfWorld) - if err == nil { - glog.Infof(volumeToAttach.GenerateMsgDetailed("attacherDetacher.AttachVolume started", "")) - } - if err != nil && !exponentialbackoff.IsExponentialBackoff(err) { - // Ignore exponentialbackoff.IsExponentialBackoff errors, they are expected. - // Log all other errors. - glog.Errorf(volumeToAttach.GenerateErrorDetailed("attacherDetacher.AttachVolume failed to start", err).Error()) - } - } - } + rc.attachDesiredVolumes() // Update Node Status err := rc.nodeStatusUpdater.UpdateNodeStatuses() @@ -290,3 +249,48 @@ func (rc *reconciler) reconcile() { glog.Warningf("UpdateNodeStatuses failed with: %v", err) } } + +func (rc *reconciler) attachDesiredVolumes() { + // Ensure volumes that should be attached are attached. + for _, volumeToAttach := range rc.desiredStateOfWorld.GetVolumesToAttach() { + if rc.actualStateOfWorld.VolumeNodeExists(volumeToAttach.VolumeName, volumeToAttach.NodeName) { + // Volume/Node exists, touch it to reset detachRequestedTime + glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Volume attached--touching", "")) + rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName) + continue + } + // Don't even try to start an operation if there is already one running + if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") { + glog.V(10).Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName) + continue + } + + if rc.isMultiAttachForbidden(volumeToAttach.VolumeSpec) { + nodes := rc.actualStateOfWorld.GetNodesForVolume(volumeToAttach.VolumeName) + if len(nodes) > 0 { + if !volumeToAttach.MultiAttachErrorReported { + simpleMsg, detailedMsg := volumeToAttach.GenerateMsg("Multi-Attach error", "Volume is already exclusively attached to one node and can't be attached to another") + for _, pod := range volumeToAttach.ScheduledPods { + rc.recorder.Eventf(pod, v1.EventTypeWarning, kevents.FailedAttachVolume, simpleMsg) + } + rc.desiredStateOfWorld.SetMultiAttachError(volumeToAttach.VolumeName, volumeToAttach.NodeName) + glog.Warningf(detailedMsg) + } + continue + } + } + + // Volume/Node doesn't exist, spawn a goroutine to attach it + glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Starting attacherDetacher.AttachVolume", "")) + err := rc.attacherDetacher.AttachVolume(volumeToAttach.VolumeToAttach, rc.actualStateOfWorld) + if err == nil { + glog.Infof(volumeToAttach.GenerateMsgDetailed("attacherDetacher.AttachVolume started", "")) + } + if err != nil && !exponentialbackoff.IsExponentialBackoff(err) { + // Ignore exponentialbackoff.IsExponentialBackoff errors, they are expected. + // Log all other errors. + glog.Errorf(volumeToAttach.GenerateErrorDetailed("attacherDetacher.AttachVolume failed to start", err).Error()) + } + } + +} diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go index 7792174b2fb..32a6c974095 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go @@ -455,6 +455,17 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteOnce(t *testing. nodesForVolume := asw.GetNodesForVolume(generatedVolumeName) + // check if multiattach is marked + // at least one volume should be marked with multiattach error + nodeAttachedTo := nodesForVolume[0] + for _, volumeToAttach := range dsw.GetVolumesToAttach() { + if volumeToAttach.NodeName != nodeAttachedTo { + if !volumeToAttach.MultiAttachErrorReported { + t.Fatalf("Expected volume %q on node %q to have multiattach error", volumeToAttach.VolumeName, volumeToAttach.NodeName) + } + } + } + // Act podToDelete := "" if nodesForVolume[0] == nodeName1 {