Merge pull request #90645 from neolit123/1.19-fix-retry-etcd-member-add

kubeadm: fix flakes when performing etcd MemberAdd on slower setups
This commit is contained in:
Kubernetes Prow Robot 2020-05-13 06:04:26 -07:00 committed by GitHub
commit 3b024339bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -351,23 +351,32 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
return nil, errors.Wrapf(err, "error parsing peer address %s", peerAddrs) return nil, errors.Wrapf(err, "error parsing peer address %s", peerAddrs)
} }
cli, err := clientv3.New(clientv3.Config{ // Exponential backoff for the MemberAdd operation (up to ~200 seconds)
Endpoints: c.Endpoints, etcdBackoffAdd := wait.Backoff{
DialTimeout: dialTimeout, Steps: 18,
DialOptions: []grpc.DialOption{ Duration: 100 * time.Millisecond,
grpc.WithBlock(), // block until the underlying connection is up Factor: 1.5,
}, Jitter: 0.1,
TLS: c.TLS,
})
if err != nil {
return nil, err
} }
defer cli.Close()
// Adds a new member to the cluster // Adds a new member to the cluster
var lastError error var lastError error
var resp *clientv3.MemberAddResponse var resp *clientv3.MemberAddResponse
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) { err = wait.ExponentialBackoff(etcdBackoffAdd, func() (bool, error) {
cli, err := clientv3.New(clientv3.Config{
Endpoints: c.Endpoints,
DialTimeout: etcdTimeout,
DialOptions: []grpc.DialOption{
grpc.WithBlock(), // block until the underlying connection is up
},
TLS: c.TLS,
})
if err != nil {
lastError = err
return false, nil
}
defer cli.Close()
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout) ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
resp, err = cli.MemberAdd(ctx, []string{peerAddrs}) resp, err = cli.MemberAdd(ctx, []string{peerAddrs})
cancel() cancel()