runtime: use exponential backoff for process wait

Initial wait period between checks is 1ms, and the
next ones are min(wait_period*5, 50ms)

Signed-off-by: Alexandru Matei <alexandru.matei@uipath.com>
This commit is contained in:
Alexandru Matei 2022-12-05 11:42:47 +02:00
parent 71491a69c3
commit e9ba0c11d0

View File

@ -25,6 +25,8 @@ const cpBinaryName = "cp"
const fileMode0755 = os.FileMode(0755) const fileMode0755 = os.FileMode(0755)
const maxWaitDelay = 50 * time.Millisecond
// The DefaultRateLimiterRefillTime is used for calculating the rate at // The DefaultRateLimiterRefillTime is used for calculating the rate at
// which a TokenBucket is replinished, in cases where a RateLimiter is // which a TokenBucket is replinished, in cases where a RateLimiter is
// applied to either network or disk I/O. // applied to either network or disk I/O.
@ -307,11 +309,18 @@ func ConvertAddressFamily(family int32) pbTypes.IPFamily {
} }
func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) bool { func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
secs := time.Duration(timeoutSecs) secs := time.Duration(timeoutSecs) * time.Second
timeout := time.After(secs * time.Second) timeout := time.After(secs)
delay := 1 * time.Millisecond
for { for {
// Check if the process is running periodically to avoid a busy loop // Wait4 is used to reap and check that a child terminated.
// Without the Wait4 call, Kill(0) for a child will always exit without
// error because the process isn't reaped.
// Wait4 return ECHLD error for non-child processes. Kill(0) is meant
// to address this case, once the process is reaped by init process,
// the call will return ESRCH error.
// "A watched pot never boils" and an unwaited-for process never appears to die! // "A watched pot never boils" and an unwaited-for process never appears to die!
waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil) waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil)
@ -324,7 +333,12 @@ func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) b
} }
select { select {
case <-time.After(50 * time.Millisecond): case <-time.After(delay):
delay = delay * 5
if delay > maxWaitDelay {
delay = maxWaitDelay
}
case <-timeout: case <-timeout:
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs) logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
return true return true