From 70c9f770d7aa2194bfd3f58fe01756a7d200b866 Mon Sep 17 00:00:00 2001
From: Michael Gasch <mgasch@vmware.com>
Date: Tue, 31 Mar 2020 12:01:53 +0200
Subject: [PATCH] Add etcd WithRequireLeader option to API watches

Watches against etcd in the API server can hang forever if the etcd
cluster loses quorum, e.g. the majority of nodes crashes. This fix
improves responsiveness (detection and reaction time) of API server
watches against etcd in some rare (but still possible) edge cases so
that watches are terminated with `"etcdserver: no leader"
(ErrNoLeader)`.

Implementation behavior described by jingyih:

```
The etcd server waits until it cannot find a leader for 3 election
timeouts to cancel existing streams. 3 is currently a hard coded
constant. The election timeout defaults to 1000ms.

If the cluster is healthy, when the leader is stopped, the leadership
transfer should be smooth. (leader transfers its leadership before
stopping). If leader is hard killed, other servers will take an election
timeout to realize leader lost and start campaign.
```

For further details, discussion and validation see
https://github.com/kubernetes/kubernetes/issues/89488#issuecomment-606491110
and https://github.com/etcd-io/etcd/issues/8980.

Closes: https://github.com/kubernetes/kubernetes/issues/89488

Signed-off-by: Michael Gasch <mgasch@vmware.com>
---
 .../src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go  | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
index ca1583fb379..a9acf6e6bd2 100644
--- a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
+++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
@@ -126,7 +126,15 @@ func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, re
 		// The filter doesn't filter out any object.
 		wc.internalPred = storage.Everything
 	}
-	wc.ctx, wc.cancel = context.WithCancel(ctx)
+
+	// The etcd server waits until it cannot find a leader for 3 election
+	// timeouts to cancel existing streams. 3 is currently a hard coded
+	// constant. The election timeout defaults to 1000ms. If the cluster is
+	// healthy, when the leader is stopped, the leadership transfer should be
+	// smooth. (leader transfers its leadership before stopping). If leader is
+	// hard killed, other servers will take an election timeout to realize
+	// leader lost and start campaign.
+	wc.ctx, wc.cancel = context.WithCancel(clientv3.WithRequireLeader(ctx))
 	return wc
 }