From 70c9f770d7aa2194bfd3f58fe01756a7d200b866 Mon Sep 17 00:00:00 2001 From: Michael Gasch Date: Tue, 31 Mar 2020 12:01:53 +0200 Subject: [PATCH] Add etcd WithRequireLeader option to API watches Watches against etcd in the API server can hang forever if the etcd cluster loses quorum, e.g. the majority of nodes crashes. This fix improves responsiveness (detection and reaction time) of API server watches against etcd in some rare (but still possible) edge cases so that watches are terminated with `"etcdserver: no leader" (ErrNoLeader)`. Implementation behavior described by jingyih: ``` The etcd server waits until it cannot find a leader for 3 election timeouts to cancel existing streams. 3 is currently a hard coded constant. The election timeout defaults to 1000ms. If the cluster is healthy, when the leader is stopped, the leadership transfer should be smooth. (leader transfers its leadership before stopping). If leader is hard killed, other servers will take an election timeout to realize leader lost and start campaign. ``` For further details, discussion and validation see https://github.com/kubernetes/kubernetes/issues/89488#issuecomment-606491110 and https://github.com/etcd-io/etcd/issues/8980. Closes: https://github.com/kubernetes/kubernetes/issues/89488 Signed-off-by: Michael Gasch --- .../src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go index ca1583fb379..a9acf6e6bd2 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go @@ -126,7 +126,15 @@ func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, re // The filter doesn't filter out any object. wc.internalPred = storage.Everything } - wc.ctx, wc.cancel = context.WithCancel(ctx) + + // The etcd server waits until it cannot find a leader for 3 election + // timeouts to cancel existing streams. 3 is currently a hard coded + // constant. The election timeout defaults to 1000ms. If the cluster is + // healthy, when the leader is stopped, the leadership transfer should be + // smooth. (leader transfers its leadership before stopping). If leader is + // hard killed, other servers will take an election timeout to realize + // leader lost and start campaign. + wc.ctx, wc.cancel = context.WithCancel(clientv3.WithRequireLeader(ctx)) return wc }