mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 02:34:03 +00:00
kubeadm: switch from ExponentialBackoff() to PollUntilContextTimeout()
Switch to PollUntilContextTimeout() everywhere to allow usage of the exposed timeouts in the kubeadm API. Exponential backoff options are more difficult to expose in this regard and a bit too detailed for the common user - i.e. have "steps", "factor" and so on.
This commit is contained in:
parent
caf5311413
commit
5f876b9d0a
@ -28,7 +28,6 @@ import (
|
|||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/version"
|
"k8s.io/apimachinery/pkg/util/version"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
apimachineryversion "k8s.io/apimachinery/pkg/version"
|
apimachineryversion "k8s.io/apimachinery/pkg/version"
|
||||||
componentversion "k8s.io/component-base/version"
|
componentversion "k8s.io/component-base/version"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
@ -495,15 +494,6 @@ var (
|
|||||||
// the bootstrap tokens to access the kubeadm-certs Secret during the join of a new control-plane
|
// the bootstrap tokens to access the kubeadm-certs Secret during the join of a new control-plane
|
||||||
KubeadmCertsClusterRoleName = fmt.Sprintf("kubeadm:%s", KubeadmCertsSecret)
|
KubeadmCertsClusterRoleName = fmt.Sprintf("kubeadm:%s", KubeadmCertsSecret)
|
||||||
|
|
||||||
// StaticPodMirroringDefaultRetry is used a backoff strategy for
|
|
||||||
// waiting for static pods to be mirrored to the apiserver.
|
|
||||||
StaticPodMirroringDefaultRetry = wait.Backoff{
|
|
||||||
Steps: 30,
|
|
||||||
Duration: 1 * time.Second,
|
|
||||||
Factor: 1.0,
|
|
||||||
Jitter: 0.1,
|
|
||||||
}
|
|
||||||
|
|
||||||
// defaultKubernetesPlaceholderVersion is a placeholder version in case the component-base
|
// defaultKubernetesPlaceholderVersion is a placeholder version in case the component-base
|
||||||
// version was not populated during build.
|
// version was not populated during build.
|
||||||
defaultKubernetesPlaceholderVersion = version.MustParseSemantic("v1.0.0-placeholder-version")
|
defaultKubernetesPlaceholderVersion = version.MustParseSemantic("v1.0.0-placeholder-version")
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
|
||||||
@ -195,29 +196,33 @@ func getNodeNameFromKubeletConfig(fileName string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getAPIEndpoint(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint) error {
|
func getAPIEndpoint(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint) error {
|
||||||
return getAPIEndpointWithBackoff(client, nodeName, apiEndpoint, constants.StaticPodMirroringDefaultRetry)
|
return getAPIEndpointWithRetry(client, nodeName, apiEndpoint,
|
||||||
|
constants.StaticPodMirroringRetryInterval, constants.StaticPodMirroringTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getAPIEndpointWithBackoff(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint, backoff wait.Backoff) error {
|
func getAPIEndpointWithRetry(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint,
|
||||||
|
interval, timeout time.Duration) error {
|
||||||
var err error
|
var err error
|
||||||
var errs []error
|
var errs []error
|
||||||
|
|
||||||
if err = getAPIEndpointFromPodAnnotation(client, nodeName, apiEndpoint, backoff); err == nil {
|
if err = getAPIEndpointFromPodAnnotation(client, nodeName, apiEndpoint, interval, timeout); err == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
errs = append(errs, errors.WithMessagef(err, "could not retrieve API endpoints for node %q using pod annotations", nodeName))
|
errs = append(errs, errors.WithMessagef(err, "could not retrieve API endpoints for node %q using pod annotations", nodeName))
|
||||||
return errorsutil.NewAggregate(errs)
|
return errorsutil.NewAggregate(errs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getAPIEndpointFromPodAnnotation(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint, backoff wait.Backoff) error {
|
func getAPIEndpointFromPodAnnotation(client clientset.Interface, nodeName string, apiEndpoint *kubeadmapi.APIEndpoint,
|
||||||
|
interval, timeout time.Duration) error {
|
||||||
var rawAPIEndpoint string
|
var rawAPIEndpoint string
|
||||||
var lastErr error
|
var lastErr error
|
||||||
// Let's tolerate some unexpected transient failures from the API server or load balancers. Also, if
|
// Let's tolerate some unexpected transient failures from the API server or load balancers. Also, if
|
||||||
// static pods were not yet mirrored into the API server we want to wait for this propagation.
|
// static pods were not yet mirrored into the API server we want to wait for this propagation.
|
||||||
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), interval, timeout, true,
|
||||||
rawAPIEndpoint, lastErr = getRawAPIEndpointFromPodAnnotationWithoutRetry(client, nodeName)
|
func(ctx context.Context) (bool, error) {
|
||||||
return lastErr == nil, nil
|
rawAPIEndpoint, lastErr = getRawAPIEndpointFromPodAnnotationWithoutRetry(ctx, client, nodeName)
|
||||||
})
|
return lastErr == nil, nil
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -229,9 +234,9 @@ func getAPIEndpointFromPodAnnotation(client clientset.Interface, nodeName string
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRawAPIEndpointFromPodAnnotationWithoutRetry(client clientset.Interface, nodeName string) (string, error) {
|
func getRawAPIEndpointFromPodAnnotationWithoutRetry(ctx context.Context, client clientset.Interface, nodeName string) (string, error) {
|
||||||
podList, err := client.CoreV1().Pods(metav1.NamespaceSystem).List(
|
podList, err := client.CoreV1().Pods(metav1.NamespaceSystem).List(
|
||||||
context.TODO(),
|
ctx,
|
||||||
metav1.ListOptions{
|
metav1.ListOptions{
|
||||||
FieldSelector: fmt.Sprintf("spec.nodeName=%s", nodeName),
|
FieldSelector: fmt.Sprintf("spec.nodeName=%s", nodeName),
|
||||||
LabelSelector: fmt.Sprintf("component=%s,tier=%s", constants.KubeAPIServer, constants.ControlPlaneTier),
|
LabelSelector: fmt.Sprintf("component=%s,tier=%s", constants.KubeAPIServer, constants.ControlPlaneTier),
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
|
||||||
@ -32,7 +33,6 @@ import (
|
|||||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
clientsetfake "k8s.io/client-go/kubernetes/fake"
|
clientsetfake "k8s.io/client-go/kubernetes/fake"
|
||||||
clienttesting "k8s.io/client-go/testing"
|
clienttesting "k8s.io/client-go/testing"
|
||||||
|
|
||||||
@ -461,7 +461,8 @@ func TestGetAPIEndpointWithBackoff(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
apiEndpoint := kubeadmapi.APIEndpoint{}
|
apiEndpoint := kubeadmapi.APIEndpoint{}
|
||||||
err := getAPIEndpointWithBackoff(client, rt.nodeName, &apiEndpoint, wait.Backoff{Duration: 0, Jitter: 0, Steps: 1})
|
err := getAPIEndpointWithRetry(client, rt.nodeName, &apiEndpoint,
|
||||||
|
time.Millisecond*10, time.Millisecond*100)
|
||||||
if err != nil && !rt.expectedErr {
|
if err != nil && !rt.expectedErr {
|
||||||
t.Errorf("got error %q; was expecting no errors", err)
|
t.Errorf("got error %q; was expecting no errors", err)
|
||||||
return
|
return
|
||||||
@ -718,7 +719,8 @@ func TestGetAPIEndpointFromPodAnnotation(t *testing.T) {
|
|||||||
rt.clientSetup(client)
|
rt.clientSetup(client)
|
||||||
}
|
}
|
||||||
apiEndpoint := kubeadmapi.APIEndpoint{}
|
apiEndpoint := kubeadmapi.APIEndpoint{}
|
||||||
err := getAPIEndpointFromPodAnnotation(client, rt.nodeName, &apiEndpoint, wait.Backoff{Duration: 0, Jitter: 0, Steps: 1})
|
err := getAPIEndpointFromPodAnnotation(client, rt.nodeName, &apiEndpoint,
|
||||||
|
time.Millisecond*10, time.Millisecond*100)
|
||||||
if err != nil && !rt.expectedErr {
|
if err != nil && !rt.expectedErr {
|
||||||
t.Errorf("got error %v, but wasn't expecting any error", err)
|
t.Errorf("got error %v, but wasn't expecting any error", err)
|
||||||
return
|
return
|
||||||
@ -832,7 +834,7 @@ func TestGetRawAPIEndpointFromPodAnnotationWithoutRetry(t *testing.T) {
|
|||||||
if rt.clientSetup != nil {
|
if rt.clientSetup != nil {
|
||||||
rt.clientSetup(client)
|
rt.clientSetup(client)
|
||||||
}
|
}
|
||||||
endpoint, err := getRawAPIEndpointFromPodAnnotationWithoutRetry(client, rt.nodeName)
|
endpoint, err := getRawAPIEndpointFromPodAnnotationWithoutRetry(context.Background(), client, rt.nodeName)
|
||||||
if err != nil && !rt.expectedErr {
|
if err != nil && !rt.expectedErr {
|
||||||
t.Errorf("got error %v, but wasn't expecting any error", err)
|
t.Errorf("got error %v, but wasn't expecting any error", err)
|
||||||
return
|
return
|
||||||
|
@ -45,14 +45,6 @@ import (
|
|||||||
|
|
||||||
const etcdTimeout = 2 * time.Second
|
const etcdTimeout = 2 * time.Second
|
||||||
|
|
||||||
// Exponential backoff for etcd operations (up to ~200 seconds)
|
|
||||||
var etcdBackoff = wait.Backoff{
|
|
||||||
Steps: 18,
|
|
||||||
Duration: 100 * time.Millisecond,
|
|
||||||
Factor: 1.5,
|
|
||||||
Jitter: 0.1,
|
|
||||||
}
|
|
||||||
|
|
||||||
// ErrNoMemberIDForPeerURL is returned when it is not possible to obtain a member ID
|
// ErrNoMemberIDForPeerURL is returned when it is not possible to obtain a member ID
|
||||||
// from a given peer URL
|
// from a given peer URL
|
||||||
var ErrNoMemberIDForPeerURL = errors.New("no member id found for peer URL")
|
var ErrNoMemberIDForPeerURL = errors.New("no member id found for peer URL")
|
||||||
@ -105,7 +97,7 @@ type Client struct {
|
|||||||
|
|
||||||
newEtcdClient func(endpoints []string) (etcdClient, error)
|
newEtcdClient func(endpoints []string) (etcdClient, error)
|
||||||
|
|
||||||
listMembersFunc func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error)
|
listMembersFunc func(timeout time.Duration) (*clientv3.MemberListResponse, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new EtcdCluster client
|
// New creates a new EtcdCluster client
|
||||||
@ -178,31 +170,33 @@ func NewFromCluster(client clientset.Interface, certificatesDir string) (*Client
|
|||||||
|
|
||||||
// getEtcdEndpoints returns the list of etcd endpoints.
|
// getEtcdEndpoints returns the list of etcd endpoints.
|
||||||
func getEtcdEndpoints(client clientset.Interface) ([]string, error) {
|
func getEtcdEndpoints(client clientset.Interface) ([]string, error) {
|
||||||
return getEtcdEndpointsWithBackoff(client, constants.StaticPodMirroringDefaultRetry)
|
return getEtcdEndpointsWithRetry(client,
|
||||||
|
constants.StaticPodMirroringRetryInterval, constants.StaticPodMirroringTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getEtcdEndpointsWithBackoff(client clientset.Interface, backoff wait.Backoff) ([]string, error) {
|
func getEtcdEndpointsWithRetry(client clientset.Interface, interval, timeout time.Duration) ([]string, error) {
|
||||||
return getRawEtcdEndpointsFromPodAnnotation(client, backoff)
|
return getRawEtcdEndpointsFromPodAnnotation(client, interval, timeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getRawEtcdEndpointsFromPodAnnotation returns the list of endpoints as reported on etcd's pod annotations using the given backoff
|
// getRawEtcdEndpointsFromPodAnnotation returns the list of endpoints as reported on etcd's pod annotations using the given backoff
|
||||||
func getRawEtcdEndpointsFromPodAnnotation(client clientset.Interface, backoff wait.Backoff) ([]string, error) {
|
func getRawEtcdEndpointsFromPodAnnotation(client clientset.Interface, interval, timeout time.Duration) ([]string, error) {
|
||||||
etcdEndpoints := []string{}
|
etcdEndpoints := []string{}
|
||||||
var lastErr error
|
var lastErr error
|
||||||
// Let's tolerate some unexpected transient failures from the API server or load balancers. Also, if
|
// Let's tolerate some unexpected transient failures from the API server or load balancers. Also, if
|
||||||
// static pods were not yet mirrored into the API server we want to wait for this propagation.
|
// static pods were not yet mirrored into the API server we want to wait for this propagation.
|
||||||
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), interval, timeout, true,
|
||||||
var overallEtcdPodCount int
|
func(_ context.Context) (bool, error) {
|
||||||
if etcdEndpoints, overallEtcdPodCount, lastErr = getRawEtcdEndpointsFromPodAnnotationWithoutRetry(client); lastErr != nil {
|
var overallEtcdPodCount int
|
||||||
return false, nil
|
if etcdEndpoints, overallEtcdPodCount, lastErr = getRawEtcdEndpointsFromPodAnnotationWithoutRetry(client); lastErr != nil {
|
||||||
}
|
return false, nil
|
||||||
if len(etcdEndpoints) == 0 || overallEtcdPodCount != len(etcdEndpoints) {
|
}
|
||||||
klog.V(4).Infof("found a total of %d etcd pods and the following endpoints: %v; retrying",
|
if len(etcdEndpoints) == 0 || overallEtcdPodCount != len(etcdEndpoints) {
|
||||||
overallEtcdPodCount, etcdEndpoints)
|
klog.V(4).Infof("found a total of %d etcd pods and the following endpoints: %v; retrying",
|
||||||
return false, nil
|
overallEtcdPodCount, etcdEndpoints)
|
||||||
}
|
return false, nil
|
||||||
return true, nil
|
}
|
||||||
})
|
return true, nil
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
const message = "could not retrieve the list of etcd endpoints"
|
const message = "could not retrieve the list of etcd endpoints"
|
||||||
if lastErr != nil {
|
if lastErr != nil {
|
||||||
@ -244,24 +238,25 @@ func (c *Client) Sync() error {
|
|||||||
// Syncs the list of endpoints
|
// Syncs the list of endpoints
|
||||||
var cli etcdClient
|
var cli etcdClient
|
||||||
var lastError error
|
var lastError error
|
||||||
err := wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, constants.EtcdAPICallTimeout,
|
||||||
var err error
|
true, func(_ context.Context) (bool, error) {
|
||||||
cli, err = c.newEtcdClient(c.Endpoints)
|
var err error
|
||||||
if err != nil {
|
cli, err = c.newEtcdClient(c.Endpoints)
|
||||||
|
if err != nil {
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
defer func() { _ = cli.Close() }()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
|
err = cli.Sync(ctx)
|
||||||
|
cancel()
|
||||||
|
if err == nil {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
klog.V(5).Infof("Failed to sync etcd endpoints: %v", err)
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
})
|
||||||
defer cli.Close()
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
|
||||||
err = cli.Sync(ctx)
|
|
||||||
cancel()
|
|
||||||
if err == nil {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
klog.V(5).Infof("Failed to sync etcd endpoints: %v", err)
|
|
||||||
lastError = err
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return lastError
|
return lastError
|
||||||
}
|
}
|
||||||
@ -278,31 +273,32 @@ type Member struct {
|
|||||||
PeerURL string
|
PeerURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) listMembers(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
func (c *Client) listMembers(timeout time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
// Gets the member list
|
// Gets the member list
|
||||||
var lastError error
|
var lastError error
|
||||||
var resp *clientv3.MemberListResponse
|
var resp *clientv3.MemberListResponse
|
||||||
if backoff == nil {
|
if timeout == 0 {
|
||||||
backoff = &etcdBackoff
|
timeout = constants.EtcdAPICallTimeout
|
||||||
}
|
}
|
||||||
err := wait.ExponentialBackoff(*backoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, timeout,
|
||||||
cli, err := c.newEtcdClient(c.Endpoints)
|
true, func(_ context.Context) (bool, error) {
|
||||||
if err != nil {
|
cli, err := c.newEtcdClient(c.Endpoints)
|
||||||
|
if err != nil {
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
defer func() { _ = cli.Close() }()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
|
resp, err = cli.MemberList(ctx)
|
||||||
|
cancel()
|
||||||
|
if err == nil {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
klog.V(5).Infof("Failed to get etcd member list: %v", err)
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
})
|
||||||
defer cli.Close()
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
|
||||||
resp, err = cli.MemberList(ctx)
|
|
||||||
cancel()
|
|
||||||
if err == nil {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
klog.V(5).Infof("Failed to get etcd member list: %v", err)
|
|
||||||
lastError = err
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, lastError
|
return nil, lastError
|
||||||
}
|
}
|
||||||
@ -311,7 +307,7 @@ func (c *Client) listMembers(backoff *wait.Backoff) (*clientv3.MemberListRespons
|
|||||||
|
|
||||||
// GetMemberID returns the member ID of the given peer URL
|
// GetMemberID returns the member ID of the given peer URL
|
||||||
func (c *Client) GetMemberID(peerURL string) (uint64, error) {
|
func (c *Client) GetMemberID(peerURL string) (uint64, error) {
|
||||||
resp, err := c.listMembersFunc(nil)
|
resp, err := c.listMembersFunc(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@ -326,7 +322,7 @@ func (c *Client) GetMemberID(peerURL string) (uint64, error) {
|
|||||||
|
|
||||||
// ListMembers returns the member list.
|
// ListMembers returns the member list.
|
||||||
func (c *Client) ListMembers() ([]Member, error) {
|
func (c *Client) ListMembers() ([]Member, error) {
|
||||||
resp, err := c.listMembersFunc(nil)
|
resp, err := c.listMembersFunc(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -343,28 +339,29 @@ func (c *Client) RemoveMember(id uint64) ([]Member, error) {
|
|||||||
// Remove an existing member from the cluster
|
// Remove an existing member from the cluster
|
||||||
var lastError error
|
var lastError error
|
||||||
var resp *clientv3.MemberRemoveResponse
|
var resp *clientv3.MemberRemoveResponse
|
||||||
err := wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, constants.EtcdAPICallTimeout,
|
||||||
cli, err := c.newEtcdClient(c.Endpoints)
|
true, func(_ context.Context) (bool, error) {
|
||||||
if err != nil {
|
cli, err := c.newEtcdClient(c.Endpoints)
|
||||||
|
if err != nil {
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
defer func() { _ = cli.Close() }()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
|
resp, err = cli.MemberRemove(ctx, id)
|
||||||
|
cancel()
|
||||||
|
if err == nil {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if errors.Is(rpctypes.ErrMemberNotFound, err) {
|
||||||
|
klog.V(5).Infof("Member was already removed, because member %s was not found", strconv.FormatUint(id, 16))
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
klog.V(5).Infof("Failed to remove etcd member: %v", err)
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
})
|
||||||
defer cli.Close()
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
|
||||||
resp, err = cli.MemberRemove(ctx, id)
|
|
||||||
cancel()
|
|
||||||
if err == nil {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
if errors.Is(rpctypes.ErrMemberNotFound, err) {
|
|
||||||
klog.V(5).Infof("Member was already removed, because member %s was not found", strconv.FormatUint(id, 16))
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
klog.V(5).Infof("Failed to remove etcd member: %v", err)
|
|
||||||
lastError = err
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, lastError
|
return nil, lastError
|
||||||
}
|
}
|
||||||
@ -407,7 +404,7 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer func() { _ = cli.Close() }()
|
||||||
|
|
||||||
// Adds a new member to the cluster
|
// Adds a new member to the cluster
|
||||||
var (
|
var (
|
||||||
@ -416,46 +413,47 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
|||||||
learnerID uint64
|
learnerID uint64
|
||||||
resp *clientv3.MemberAddResponse
|
resp *clientv3.MemberAddResponse
|
||||||
)
|
)
|
||||||
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
err = wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, constants.EtcdAPICallTimeout,
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
true, func(_ context.Context) (bool, error) {
|
||||||
defer cancel()
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
if isLearner {
|
defer cancel()
|
||||||
// if learnerID is set, it means the etcd member is already added successfully.
|
if isLearner {
|
||||||
if learnerID == 0 {
|
// if learnerID is set, it means the etcd member is already added successfully.
|
||||||
klog.V(1).Info("[etcd] Adding etcd member as learner")
|
if learnerID == 0 {
|
||||||
resp, err = cli.MemberAddAsLearner(ctx, []string{peerAddrs})
|
klog.V(1).Info("[etcd] Adding etcd member as learner")
|
||||||
if err != nil {
|
resp, err = cli.MemberAddAsLearner(ctx, []string{peerAddrs})
|
||||||
lastError = err
|
if err != nil {
|
||||||
return false, nil
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
learnerID = resp.Member.ID
|
||||||
}
|
}
|
||||||
learnerID = resp.Member.ID
|
respMembers = resp.Members
|
||||||
}
|
|
||||||
respMembers = resp.Members
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, err = cli.MemberAdd(ctx, []string{peerAddrs})
|
|
||||||
if err == nil {
|
|
||||||
respMembers = resp.Members
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the error indicates that the peer already exists, exit early. In this situation, resp is nil, so
|
|
||||||
// call out to MemberList to fetch all the members before returning.
|
|
||||||
if errors.Is(err, rpctypes.ErrPeerURLExist) {
|
|
||||||
klog.V(5).Info("The peer URL for the added etcd member already exists. Fetching the existing etcd members")
|
|
||||||
var listResp *clientv3.MemberListResponse
|
|
||||||
listResp, err = cli.MemberList(ctx)
|
|
||||||
if err == nil {
|
|
||||||
respMembers = listResp.Members
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
klog.V(5).Infof("Failed to add etcd member: %v", err)
|
resp, err = cli.MemberAdd(ctx, []string{peerAddrs})
|
||||||
lastError = err
|
if err == nil {
|
||||||
return false, nil
|
respMembers = resp.Members
|
||||||
})
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the error indicates that the peer already exists, exit early. In this situation, resp is nil, so
|
||||||
|
// call out to MemberList to fetch all the members before returning.
|
||||||
|
if errors.Is(err, rpctypes.ErrPeerURLExist) {
|
||||||
|
klog.V(5).Info("The peer URL for the added etcd member already exists. Fetching the existing etcd members")
|
||||||
|
var listResp *clientv3.MemberListResponse
|
||||||
|
listResp, err = cli.MemberList(ctx)
|
||||||
|
if err == nil {
|
||||||
|
respMembers = listResp.Members
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.V(5).Infof("Failed to add etcd member: %v", err)
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, lastError
|
return nil, lastError
|
||||||
}
|
}
|
||||||
@ -487,7 +485,7 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
|||||||
|
|
||||||
// isLearner returns true if the given member ID is a learner.
|
// isLearner returns true if the given member ID is a learner.
|
||||||
func (c *Client) isLearner(memberID uint64) (bool, error) {
|
func (c *Client) isLearner(memberID uint64) (bool, error) {
|
||||||
resp, err := c.listMembersFunc(nil)
|
resp, err := c.listMembersFunc(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
@ -517,7 +515,7 @@ func (c *Client) MemberPromote(learnerID uint64) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer func() { _ = cli.Close() }()
|
||||||
|
|
||||||
// TODO: warning logs from etcd client should be removed.
|
// TODO: warning logs from etcd client should be removed.
|
||||||
// The warning logs are printed by etcd client code for several reasons, including
|
// The warning logs are printed by etcd client code for several reasons, including
|
||||||
@ -528,19 +526,20 @@ func (c *Client) MemberPromote(learnerID uint64) error {
|
|||||||
var (
|
var (
|
||||||
lastError error
|
lastError error
|
||||||
)
|
)
|
||||||
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
err = wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, constants.EtcdAPICallTimeout,
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
true, func(_ context.Context) (bool, error) {
|
||||||
defer cancel()
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
_, err = cli.MemberPromote(ctx, learnerID)
|
_, err = cli.MemberPromote(ctx, learnerID)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %s", strconv.FormatUint(learnerID, 16))
|
klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %s", strconv.FormatUint(learnerID, 16))
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
klog.V(5).Infof("[etcd] Promoting the learner %s failed: %v", strconv.FormatUint(learnerID, 16), err)
|
klog.V(5).Infof("[etcd] Promoting the learner %s failed: %v", strconv.FormatUint(learnerID, 16), err)
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return lastError
|
return lastError
|
||||||
}
|
}
|
||||||
@ -560,24 +559,25 @@ func (c *Client) getClusterStatus() (map[string]*clientv3.StatusResponse, error)
|
|||||||
// Gets the member status
|
// Gets the member status
|
||||||
var lastError error
|
var lastError error
|
||||||
var resp *clientv3.StatusResponse
|
var resp *clientv3.StatusResponse
|
||||||
err := wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, constants.EtcdAPICallTimeout,
|
||||||
cli, err := c.newEtcdClient(c.Endpoints)
|
true, func(_ context.Context) (bool, error) {
|
||||||
if err != nil {
|
cli, err := c.newEtcdClient(c.Endpoints)
|
||||||
|
if err != nil {
|
||||||
|
lastError = err
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
defer func() { _ = cli.Close() }()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||||
|
resp, err = cli.Status(ctx, ep)
|
||||||
|
cancel()
|
||||||
|
if err == nil {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
klog.V(5).Infof("Failed to get etcd status for %s: %v", ep, err)
|
||||||
lastError = err
|
lastError = err
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
})
|
||||||
defer cli.Close()
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
|
||||||
resp, err = cli.Status(ctx, ep)
|
|
||||||
cancel()
|
|
||||||
if err == nil {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
klog.V(5).Infof("Failed to get etcd status for %s: %v", ep, err)
|
|
||||||
lastError = err
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, lastError
|
return nil, lastError
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
|
||||||
@ -29,7 +30,6 @@ import (
|
|||||||
clientv3 "go.etcd.io/etcd/client/v3"
|
clientv3 "go.etcd.io/etcd/client/v3"
|
||||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
clientsetfake "k8s.io/client-go/kubernetes/fake"
|
clientsetfake "k8s.io/client-go/kubernetes/fake"
|
||||||
clienttesting "k8s.io/client-go/testing"
|
clienttesting "k8s.io/client-go/testing"
|
||||||
|
|
||||||
@ -208,7 +208,7 @@ func TestGetEtcdEndpointsWithBackoff(t *testing.T) {
|
|||||||
t.Errorf("error setting up test creating pod for node %q", pod.NodeName)
|
t.Errorf("error setting up test creating pod for node %q", pod.NodeName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
endpoints, err := getEtcdEndpointsWithBackoff(client, wait.Backoff{Duration: 0, Jitter: 0, Steps: 1})
|
endpoints, err := getEtcdEndpointsWithRetry(client, time.Microsecond*10, time.Millisecond*100)
|
||||||
if err != nil && !rt.expectedErr {
|
if err != nil && !rt.expectedErr {
|
||||||
t.Errorf("got error %q; was expecting no errors", err)
|
t.Errorf("got error %q; was expecting no errors", err)
|
||||||
return
|
return
|
||||||
@ -293,7 +293,7 @@ func TestGetRawEtcdEndpointsFromPodAnnotation(t *testing.T) {
|
|||||||
if rt.clientSetup != nil {
|
if rt.clientSetup != nil {
|
||||||
rt.clientSetup(client)
|
rt.clientSetup(client)
|
||||||
}
|
}
|
||||||
endpoints, err := getRawEtcdEndpointsFromPodAnnotation(client, wait.Backoff{Duration: 0, Jitter: 0, Steps: 1})
|
endpoints, err := getRawEtcdEndpointsFromPodAnnotation(client, time.Microsecond*10, time.Millisecond*100)
|
||||||
if err != nil && !rt.expectedErr {
|
if err != nil && !rt.expectedErr {
|
||||||
t.Errorf("got error %v, but wasn't expecting any error", err)
|
t.Errorf("got error %v, but wasn't expecting any error", err)
|
||||||
return
|
return
|
||||||
@ -482,9 +482,9 @@ func TestClient_GetMemberID(t *testing.T) {
|
|||||||
Endpoints: tt.fields.Endpoints,
|
Endpoints: tt.fields.Endpoints,
|
||||||
newEtcdClient: tt.fields.newEtcdClient,
|
newEtcdClient: tt.fields.newEtcdClient,
|
||||||
}
|
}
|
||||||
c.listMembersFunc = func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
c.listMembersFunc = func(_ time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
f, _ := c.newEtcdClient([]string{})
|
f, _ := c.newEtcdClient([]string{})
|
||||||
resp, _ := f.MemberList(context.TODO())
|
resp, _ := f.MemberList(context.Background())
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -504,7 +504,7 @@ func TestListMembers(t *testing.T) {
|
|||||||
type fields struct {
|
type fields struct {
|
||||||
Endpoints []string
|
Endpoints []string
|
||||||
newEtcdClient func(endpoints []string) (etcdClient, error)
|
newEtcdClient func(endpoints []string) (etcdClient, error)
|
||||||
listMembersFunc func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error)
|
listMembersFunc func(timeout time.Duration) (*clientv3.MemberListResponse, error)
|
||||||
}
|
}
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@ -606,7 +606,7 @@ func TestListMembers(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return f, nil
|
return f, nil
|
||||||
},
|
},
|
||||||
listMembersFunc: func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
listMembersFunc: func(_ time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
return nil, errNotImplemented
|
return nil, errNotImplemented
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -622,8 +622,8 @@ func TestListMembers(t *testing.T) {
|
|||||||
listMembersFunc: tt.fields.listMembersFunc,
|
listMembersFunc: tt.fields.listMembersFunc,
|
||||||
}
|
}
|
||||||
if c.listMembersFunc == nil {
|
if c.listMembersFunc == nil {
|
||||||
c.listMembersFunc = func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
c.listMembersFunc = func(_ time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
return c.listMembers(&wait.Backoff{Steps: 1})
|
return c.listMembers(100 * time.Millisecond)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
got, err := c.ListMembers()
|
got, err := c.ListMembers()
|
||||||
@ -641,7 +641,7 @@ func TestIsLearner(t *testing.T) {
|
|||||||
type fields struct {
|
type fields struct {
|
||||||
Endpoints []string
|
Endpoints []string
|
||||||
newEtcdClient func(endpoints []string) (etcdClient, error)
|
newEtcdClient func(endpoints []string) (etcdClient, error)
|
||||||
listMembersFunc func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error)
|
listMembersFunc func(timeout time.Duration) (*clientv3.MemberListResponse, error)
|
||||||
}
|
}
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@ -756,7 +756,7 @@ func TestIsLearner(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return f, nil
|
return f, nil
|
||||||
},
|
},
|
||||||
listMembersFunc: func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
listMembersFunc: func(_ time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
return nil, errNotImplemented
|
return nil, errNotImplemented
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -772,9 +772,9 @@ func TestIsLearner(t *testing.T) {
|
|||||||
listMembersFunc: tt.fields.listMembersFunc,
|
listMembersFunc: tt.fields.listMembersFunc,
|
||||||
}
|
}
|
||||||
if c.listMembersFunc == nil {
|
if c.listMembersFunc == nil {
|
||||||
c.listMembersFunc = func(backoff *wait.Backoff) (*clientv3.MemberListResponse, error) {
|
c.listMembersFunc = func(t_ time.Duration) (*clientv3.MemberListResponse, error) {
|
||||||
f, _ := c.newEtcdClient([]string{})
|
f, _ := c.newEtcdClient([]string{})
|
||||||
resp, _ := f.MemberList(context.TODO())
|
resp, _ := f.MemberList(context.Background())
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user