Improve debuggability of leaking goroutines

This commit is contained in:
Wojciech Tyczyński 2022-04-04 14:20:41 +02:00
parent 71d87272de
commit 1b976760db

View File

@ -194,11 +194,13 @@ func EtcdMain(tests func() int) {
stop() // Don't defer this. See os.Exit documentation. stop() // Don't defer this. See os.Exit documentation.
checkNumberOfGoroutines := func() (bool, error) { checkNumberOfGoroutines := func() (bool, error) {
// Leave some room for goroutines we can not get rid of // We leave some room for leaked goroutines as there are
// like k8s.io/klog/v2.(*loggingT).flushDaemon() // still some leaks, mostly:
// TODO(#108483): Figure out if we can reduce this // - leak from lumberjack package we're vendoring
// further (ideally down to zero). // - leak from apiserve healthz
if dg := runtime.NumGoroutine() - before; dg <= 4 { // - leak from opencensus library
// Once fixed, we should be able to bring it down to zero.
if dg := runtime.NumGoroutine() - before; dg <= 3 {
return true, nil return true, nil
} }
// Allow goroutines to schedule and die off. // Allow goroutines to schedule and die off.
@ -210,7 +212,9 @@ func EtcdMain(tests func() int) {
// But we keep the limit higher to account for cpu-starved environments. // But we keep the limit higher to account for cpu-starved environments.
if err := wait.Poll(100*time.Millisecond, 5*time.Second, checkNumberOfGoroutines); err != nil { if err := wait.Poll(100*time.Millisecond, 5*time.Second, checkNumberOfGoroutines); err != nil {
after := runtime.NumGoroutine() after := runtime.NumGoroutine()
klog.Fatalf("unexpected number of goroutines: before: %d after %d", before, after) stacktraces := make([]byte, 1<<20)
runtime.Stack(stacktraces, true)
klog.Fatalf("unexpected number of goroutines: before: %d after %d\n%sd", before, after, string(stacktraces))
} }
os.Exit(result) os.Exit(result)
} }