Fix spam of multiattach errors in event logs

We should be careful while generating multiattach errors.
We seem to be generating too many of them because old code
had minor bug.
This commit is contained in:
Hemant Kumar 2017-10-03 15:45:06 -04:00
parent 0ac7cb0c60
commit 67d4c40849
3 changed files with 76 additions and 42 deletions

View File

@ -101,6 +101,10 @@ type DesiredStateOfWorld interface {
// GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be
// mounted and attached for terminated pods // mounted and attached for terminated pods
GetKeepTerminatedPodVolumesForNode(k8stypes.NodeName) bool GetKeepTerminatedPodVolumesForNode(k8stypes.NodeName) bool
// Mark multiattach error as reported to prevent spamming multiple
// events for same error
SetMultiAttachError(v1.UniqueVolumeName, k8stypes.NodeName)
} }
// VolumeToAttach represents a volume that should be attached to a node. // VolumeToAttach represents a volume that should be attached to a node.
@ -329,6 +333,21 @@ func (dsw *desiredStateOfWorld) VolumeExists(
return false return false
} }
func (dsw *desiredStateOfWorld) SetMultiAttachError(
volumeName v1.UniqueVolumeName,
nodeName k8stypes.NodeName) {
dsw.RLock()
defer dsw.RUnlock()
nodeObj, nodeExists := dsw.nodesManaged[nodeName]
if nodeExists {
if volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists {
volumeObj.multiAttachErrorReported = true
dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj
}
}
}
// GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be
// mounted and attached for terminated pods // mounted and attached for terminated pods
func (dsw *desiredStateOfWorld) GetKeepTerminatedPodVolumesForNode(nodeName k8stypes.NodeName) bool { func (dsw *desiredStateOfWorld) GetKeepTerminatedPodVolumesForNode(nodeName k8stypes.NodeName) bool {

View File

@ -241,14 +241,24 @@ func (rc *reconciler) reconcile() {
} }
} }
rc.attachDesiredVolumes()
// Update Node Status
err := rc.nodeStatusUpdater.UpdateNodeStatuses()
if err != nil {
glog.Warningf("UpdateNodeStatuses failed with: %v", err)
}
}
func (rc *reconciler) attachDesiredVolumes() {
// Ensure volumes that should be attached are attached. // Ensure volumes that should be attached are attached.
for _, volumeToAttach := range rc.desiredStateOfWorld.GetVolumesToAttach() { for _, volumeToAttach := range rc.desiredStateOfWorld.GetVolumesToAttach() {
if rc.actualStateOfWorld.VolumeNodeExists( if rc.actualStateOfWorld.VolumeNodeExists(volumeToAttach.VolumeName, volumeToAttach.NodeName) {
volumeToAttach.VolumeName, volumeToAttach.NodeName) {
// Volume/Node exists, touch it to reset detachRequestedTime // Volume/Node exists, touch it to reset detachRequestedTime
glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Volume attached--touching", "")) glog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Volume attached--touching", ""))
rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName) rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName)
} else { continue
}
// Don't even try to start an operation if there is already one running // Don't even try to start an operation if there is already one running
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") { if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") {
glog.V(10).Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName) glog.V(10).Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName)
@ -263,7 +273,7 @@ func (rc *reconciler) reconcile() {
for _, pod := range volumeToAttach.ScheduledPods { for _, pod := range volumeToAttach.ScheduledPods {
rc.recorder.Eventf(pod, v1.EventTypeWarning, kevents.FailedAttachVolume, simpleMsg) rc.recorder.Eventf(pod, v1.EventTypeWarning, kevents.FailedAttachVolume, simpleMsg)
} }
volumeToAttach.MultiAttachErrorReported = true rc.desiredStateOfWorld.SetMultiAttachError(volumeToAttach.VolumeName, volumeToAttach.NodeName)
glog.Warningf(detailedMsg) glog.Warningf(detailedMsg)
} }
continue continue
@ -282,11 +292,5 @@ func (rc *reconciler) reconcile() {
glog.Errorf(volumeToAttach.GenerateErrorDetailed("attacherDetacher.AttachVolume failed to start", err).Error()) glog.Errorf(volumeToAttach.GenerateErrorDetailed("attacherDetacher.AttachVolume failed to start", err).Error())
} }
} }
}
// Update Node Status
err := rc.nodeStatusUpdater.UpdateNodeStatuses()
if err != nil {
glog.Warningf("UpdateNodeStatuses failed with: %v", err)
}
} }

View File

@ -455,6 +455,17 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteOnce(t *testing.
nodesForVolume := asw.GetNodesForVolume(generatedVolumeName) nodesForVolume := asw.GetNodesForVolume(generatedVolumeName)
// check if multiattach is marked
// at least one volume should be marked with multiattach error
nodeAttachedTo := nodesForVolume[0]
for _, volumeToAttach := range dsw.GetVolumesToAttach() {
if volumeToAttach.NodeName != nodeAttachedTo {
if !volumeToAttach.MultiAttachErrorReported {
t.Fatalf("Expected volume %q on node %q to have multiattach error", volumeToAttach.VolumeName, volumeToAttach.NodeName)
}
}
}
// Act // Act
podToDelete := "" podToDelete := ""
if nodesForVolume[0] == nodeName1 { if nodesForVolume[0] == nodeName1 {