From c008732948907db3417f99fd8726a0c6c645fd93 Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Tue, 31 Jan 2023 10:22:13 +0100
Subject: [PATCH] test/integration: add StartEtcd

In contrast to EtcdMain, it can be called by individual tests or benchmarks and
each caller will get a fresh etcd instance. However, it uses the same
underlying code and the same port for all instances, so tests cannot run in
parallel.
---
 test/integration/apiserver/watchcache_test.go |  4 +-
 test/integration/framework/etcd.go            | 55 +++++++++++--------
 test/integration/framework/goleak.go          | 34 ++++++++++++
 3 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/test/integration/apiserver/watchcache_test.go b/test/integration/apiserver/watchcache_test.go
index b5abfb368eb..ae0b1aa6ec5 100644
--- a/test/integration/apiserver/watchcache_test.go
+++ b/test/integration/apiserver/watchcache_test.go
@@ -37,12 +37,12 @@ import (
 // with one of them containing events and the other all other objects.
 func multiEtcdSetup(t *testing.T) (clientset.Interface, framework.TearDownFunc) {
 	etcdArgs := []string{"--experimental-watch-progress-notify-interval", "1s"}
-	etcd0URL, stopEtcd0, err := framework.RunCustomEtcd("etcd_watchcache0", etcdArgs)
+	etcd0URL, stopEtcd0, err := framework.RunCustomEtcd("etcd_watchcache0", etcdArgs, nil)
 	if err != nil {
 		t.Fatalf("Couldn't start etcd: %v", err)
 	}
 
-	etcd1URL, stopEtcd1, err := framework.RunCustomEtcd("etcd_watchcache1", etcdArgs)
+	etcd1URL, stopEtcd1, err := framework.RunCustomEtcd("etcd_watchcache1", etcdArgs, nil)
 	if err != nil {
 		t.Fatalf("Couldn't start etcd: %v", err)
 	}
diff --git a/test/integration/framework/etcd.go b/test/integration/framework/etcd.go
index e0fcff1007c..a7a502da4ca 100644
--- a/test/integration/framework/etcd.go
+++ b/test/integration/framework/etcd.go
@@ -26,6 +26,7 @@ import (
 	"os/exec"
 	"strings"
 	"syscall"
+	"testing"
 	"time"
 
 	"go.uber.org/goleak"
@@ -62,7 +63,7 @@ func getAvailablePort() (int, error) {
 
 // startEtcd executes an etcd instance. The returned function will signal the
 // etcd process and wait for it to exit.
-func startEtcd() (func(), error) {
+func startEtcd(output io.Writer) (func(), error) {
 	etcdURL := env.GetEnvAsStringOrFallback("KUBE_INTEGRATION_ETCD_URL", "http://127.0.0.1:2379")
 	conn, err := net.Dial("tcp", strings.TrimPrefix(etcdURL, "http://"))
 	if err == nil {
@@ -72,7 +73,7 @@ func startEtcd() (func(), error) {
 	}
 	klog.V(1).Infof("could not connect to etcd: %v", err)
 
-	currentURL, stop, err := RunCustomEtcd("integration_test_etcd_data", nil)
+	currentURL, stop, err := RunCustomEtcd("integration_test_etcd_data", nil, output)
 	if err != nil {
 		return nil, err
 	}
@@ -83,7 +84,7 @@ func startEtcd() (func(), error) {
 }
 
 // RunCustomEtcd starts a custom etcd instance for test purposes.
-func RunCustomEtcd(dataDir string, customFlags []string) (url string, stopFn func(), err error) {
+func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url string, stopFn func(), err error) {
 	// TODO: Check for valid etcd version.
 	etcdPath, err := getEtcdPath()
 	if err != nil {
@@ -119,8 +120,13 @@ func RunCustomEtcd(dataDir string, customFlags []string) (url string, stopFn fun
 	}
 	args = append(args, customFlags...)
 	cmd := exec.CommandContext(ctx, etcdPath, args...)
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
+	if output == nil {
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+	} else {
+		cmd.Stdout = output
+		cmd.Stderr = output
+	}
 	stop := func() {
 		// try to exit etcd gracefully
 		defer cancel()
@@ -194,7 +200,7 @@ func EtcdMain(tests func() int) {
 		goleak.IgnoreTopFunction("gopkg.in/natefinch/lumberjack%2ev2.(*Logger).millRun"),
 	)
 
-	stop, err := startEtcd()
+	stop, err := startEtcd(nil)
 	if err != nil {
 		klog.Fatalf("cannot run integration tests: unable to start etcd: %v", err)
 	}
@@ -202,27 +208,32 @@ func EtcdMain(tests func() int) {
 	stop() // Don't defer this. See os.Exit documentation.
 	klog.StopFlushDaemon()
 
-	// Several tests don't wait for goroutines to stop. goleak.Find retries
-	// internally, but not long enough. 5 seconds seemed to be enough for
-	// most tests, even when testing in the CI.
-	timeout := 5 * time.Second
-	start := time.Now()
-	for {
-		err := goleak.Find(goleakOpts...)
-		if err == nil {
-			break
-		}
-		if time.Now().Sub(start) >= timeout {
-			klog.ErrorS(err, "EtcdMain goroutine check")
-			result = 1
-			break
-		}
+	if err := goleakFindRetry(goleakOpts...); err != nil {
+		klog.ErrorS(err, "EtcdMain goroutine check")
+		result = 1
 	}
 
 	os.Exit(result)
 }
 
-// GetEtcdURL returns the URL of the etcd instance started by EtcdMain.
+// GetEtcdURL returns the URL of the etcd instance started by EtcdMain or StartEtcd.
 func GetEtcdURL() string {
 	return env.GetEnvAsStringOrFallback("KUBE_INTEGRATION_ETCD_URL", "http://127.0.0.1:2379")
 }
+
+// StartEtcd starts an etcd instance inside a test. It will abort the test if
+// startup fails and clean up after the test automatically. Stdout and stderr
+// of the etcd binary go to the provided writer.
+//
+// In contrast to EtcdMain, StartEtcd will not do automatic leak checking.
+// Tests can decide if and where they want to do that.
+//
+// Starting etcd multiple times per test run instead of once with EtcdMain
+// provides better separation between different tests.
+func StartEtcd(tb testing.TB, etcdOutput io.Writer) {
+	stop, err := startEtcd(etcdOutput)
+	if err != nil {
+		tb.Fatalf("unable to start etcd: %v", err)
+	}
+	tb.Cleanup(stop)
+}
diff --git a/test/integration/framework/goleak.go b/test/integration/framework/goleak.go
index 5158fff0a37..0790cfd33ab 100644
--- a/test/integration/framework/goleak.go
+++ b/test/integration/framework/goleak.go
@@ -17,6 +17,9 @@ limitations under the License.
 package framework
 
 import (
+	"testing"
+	"time"
+
 	"go.uber.org/goleak"
 	"k8s.io/apiserver/pkg/server/healthz"
 )
@@ -34,3 +37,34 @@ func IgnoreBackgroundGoroutines() []goleak.Option {
 
 	return []goleak.Option{goleak.IgnoreCurrent()}
 }
+
+// GoleakCheck sets up leak checking for a test or benchmark.
+// The check runs as cleanup operation and records an
+// error when goroutines were leaked.
+func GoleakCheck(tb testing.TB, opts ...goleak.Option) {
+	// Must be called *before* creating new goroutines.
+	opts = append(opts, IgnoreBackgroundGoroutines()...)
+
+	tb.Cleanup(func() {
+		if err := goleakFindRetry(opts...); err != nil {
+			tb.Error(err.Error())
+		}
+	})
+}
+
+func goleakFindRetry(opts ...goleak.Option) error {
+	// Several tests don't wait for goroutines to stop. goleak.Find retries
+	// internally, but not long enough. 5 seconds seemed to be enough for
+	// most tests, even when testing in the CI.
+	timeout := 5 * time.Second
+	start := time.Now()
+	for {
+		err := goleak.Find(opts...)
+		if err == nil {
+			return nil
+		}
+		if time.Now().Sub(start) >= timeout {
+			return err
+		}
+	}
+}