Make log-monitor give up on trying to ssh to a dead node after some time

This commit is contained in:
gmarek 2017-05-30 10:27:10 +02:00
parent 728a7838c5
commit 0cc1999e16

View File

@ -249,9 +249,13 @@ func (g *LogSizeGatherer) Work() bool {
)
if err != nil {
Logf("Error while trying to SSH to %v, skipping probe. Error: %v", workItem.ip, err)
if workItem.backoffMultiplier < 128 {
workItem.backoffMultiplier *= 2
// In case of repeated error give up.
if workItem.backoffMultiplier >= 128 {
Logf("Failed to ssh to a node %v multiple times in a row. Giving up.", workItem.ip)
g.wg.Done()
return false
}
workItem.backoffMultiplier *= 2
go g.pushWorkItem(workItem)
return true
}