Make log-monitor give up on trying to ssh to a dead node after some time

This commit is contained in:
gmarek 2017-05-30 10:27:10 +02:00
parent 728a7838c5
commit 0cc1999e16

View File

@ -249,9 +249,13 @@ func (g *LogSizeGatherer) Work() bool {
) )
if err != nil { if err != nil {
Logf("Error while trying to SSH to %v, skipping probe. Error: %v", workItem.ip, err) Logf("Error while trying to SSH to %v, skipping probe. Error: %v", workItem.ip, err)
if workItem.backoffMultiplier < 128 { // In case of repeated error give up.
workItem.backoffMultiplier *= 2 if workItem.backoffMultiplier >= 128 {
Logf("Failed to ssh to a node %v multiple times in a row. Giving up.", workItem.ip)
g.wg.Done()
return false
} }
workItem.backoffMultiplier *= 2
go g.pushWorkItem(workItem) go g.pushWorkItem(workItem)
return true return true
} }