mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
Merge pull request #115038 from mercedes-benz/tobiasgiese/kubeadmfix-etcd-learner-join
kubeadm: fix etcd learner join
This commit is contained in:
commit
561a35f358
@ -150,7 +150,7 @@ func CreateStackedEtcdStaticPodManifestFile(client clientset.Interface, manifest
|
||||
} else {
|
||||
klog.V(1).Infof("[etcd] Adding etcd member: %s", etcdPeerAddress)
|
||||
if features.Enabled(cfg.FeatureGates, features.EtcdLearnerMode) {
|
||||
cluster, err = etcdClient.AddMemberAsLeanerAndPromote(nodeName, etcdPeerAddress)
|
||||
cluster, err = etcdClient.AddMemberAsLearner(nodeName, etcdPeerAddress)
|
||||
} else {
|
||||
cluster, err = etcdClient.AddMember(nodeName, etcdPeerAddress)
|
||||
}
|
||||
@ -172,6 +172,17 @@ func CreateStackedEtcdStaticPodManifestFile(client clientset.Interface, manifest
|
||||
return nil
|
||||
}
|
||||
|
||||
if features.Enabled(cfg.FeatureGates, features.EtcdLearnerMode) {
|
||||
learnerID, err := etcdClient.GetMemberID(etcdPeerAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = etcdClient.MemberPromote(learnerID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("[etcd] Waiting for the new etcd member to join the cluster. This can take up to %v\n", etcdHealthyCheckInterval*etcdHealthyCheckRetries)
|
||||
if _, err := etcdClient.WaitForClusterAvailable(etcdHealthyCheckRetries, etcdHealthyCheckInterval); err != nil {
|
||||
return err
|
||||
|
@ -251,7 +251,7 @@ func (c fakeTLSEtcdClient) ListMembers() ([]etcdutil.Member, error) {
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
func (c fakeTLSEtcdClient) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]etcdutil.Member, error) {
|
||||
func (c fakeTLSEtcdClient) AddMemberAsLearner(name string, peerAddrs string) ([]etcdutil.Member, error) {
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
@ -259,6 +259,10 @@ func (c fakeTLSEtcdClient) AddMember(name string, peerAddrs string) ([]etcdutil.
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
func (c fakeTLSEtcdClient) MemberPromote(learnerID uint64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c fakeTLSEtcdClient) GetMemberID(peerURL string) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
@ -290,7 +294,7 @@ func (c fakePodManifestEtcdClient) ListMembers() ([]etcdutil.Member, error) {
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
func (c fakePodManifestEtcdClient) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]etcdutil.Member, error) {
|
||||
func (c fakePodManifestEtcdClient) AddMemberAsLearner(name string, peerAddrs string) ([]etcdutil.Member, error) {
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
@ -298,6 +302,10 @@ func (c fakePodManifestEtcdClient) AddMember(name string, peerAddrs string) ([]e
|
||||
return []etcdutil.Member{}, nil
|
||||
}
|
||||
|
||||
func (c fakePodManifestEtcdClient) MemberPromote(learnerID uint64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c fakePodManifestEtcdClient) GetMemberID(peerURL string) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
@ -60,7 +60,8 @@ type ClusterInterrogator interface {
|
||||
Sync() error
|
||||
ListMembers() ([]Member, error)
|
||||
AddMember(name string, peerAddrs string) ([]Member, error)
|
||||
AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]Member, error)
|
||||
AddMemberAsLearner(name string, peerAddrs string) ([]Member, error)
|
||||
MemberPromote(learnerID uint64) error
|
||||
GetMemberID(peerURL string) (uint64, error)
|
||||
RemoveMember(id uint64) ([]Member, error)
|
||||
}
|
||||
@ -347,8 +348,8 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
|
||||
return c.addMember(name, peerAddrs, false)
|
||||
}
|
||||
|
||||
// AddMemberAsLeanerAndPromote adds a new learner member into the etcd cluster and promotes it to a voting member
|
||||
func (c *Client) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]Member, error) {
|
||||
// AddMemberAsLearner adds a new learner member into the etcd cluster.
|
||||
func (c *Client) AddMemberAsLearner(name string, peerAddrs string) ([]Member, error) {
|
||||
return c.addMember(name, peerAddrs, true)
|
||||
}
|
||||
|
||||
@ -364,13 +365,6 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
||||
return nil, errors.Wrapf(err, "error parsing peer address %s", peerAddrs)
|
||||
}
|
||||
|
||||
// Adds a new member to the cluster
|
||||
var (
|
||||
lastError error
|
||||
respMembers []*etcdserverpb.Member
|
||||
learnerID uint64
|
||||
)
|
||||
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
||||
cli, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: c.Endpoints,
|
||||
DialTimeout: etcdTimeout,
|
||||
@ -380,14 +374,20 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
||||
TLS: c.TLS,
|
||||
})
|
||||
if err != nil {
|
||||
lastError = err
|
||||
return false, nil
|
||||
return nil, err
|
||||
}
|
||||
defer cli.Close()
|
||||
|
||||
// Adds a new member to the cluster
|
||||
var (
|
||||
lastError error
|
||||
respMembers []*etcdserverpb.Member
|
||||
learnerID uint64
|
||||
resp *clientv3.MemberAddResponse
|
||||
)
|
||||
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||
defer cancel()
|
||||
var resp *clientv3.MemberAddResponse
|
||||
if isLearner {
|
||||
// if learnerID is set, it means the etcd member is already added successfully.
|
||||
if learnerID == 0 {
|
||||
@ -399,11 +399,6 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
||||
}
|
||||
learnerID = resp.Member.ID
|
||||
}
|
||||
err = memberPromote(ctx, cli, learnerID)
|
||||
if err != nil {
|
||||
lastError = err
|
||||
return false, nil
|
||||
}
|
||||
respMembers = resp.Members
|
||||
return true, nil
|
||||
}
|
||||
@ -459,21 +454,73 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func memberPromote(ctx context.Context, cli *clientv3.Client, learnerID uint64) error {
|
||||
// isLearner returns true if the given member ID is a learner.
|
||||
func (c *Client) isLearner(memberID uint64) (bool, error) {
|
||||
resp, err := c.listMembers()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, member := range resp.Members {
|
||||
if member.ID == memberID && member.IsLearner {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// MemberPromote promotes a member as a voting member. If the given member ID is already a voting member this method
|
||||
// will return early and do nothing.
|
||||
func (c *Client) MemberPromote(learnerID uint64) error {
|
||||
isLearner, err := c.isLearner(learnerID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !isLearner {
|
||||
klog.V(1).Infof("[etcd] Member %016x already promoted.", learnerID)
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.V(1).Infof("[etcd] Promoting a learner as a voting member: %016x", learnerID)
|
||||
cli, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: c.Endpoints,
|
||||
DialTimeout: etcdTimeout,
|
||||
DialOptions: []grpc.DialOption{
|
||||
grpc.WithBlock(), // block until the underlying connection is up
|
||||
},
|
||||
TLS: c.TLS,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cli.Close()
|
||||
|
||||
// TODO: warning logs from etcd client should be removed.
|
||||
// The warning logs are printed by etcd client code for several reasons, including
|
||||
// 1. can not promote yet(no synced)
|
||||
// 2. context deadline exceeded
|
||||
// 3. peer URLs already exists
|
||||
// Once the client provides a way to check if the etcd learner is ready to promote, the retry logic can be revisited.
|
||||
_, err := cli.MemberPromote(ctx, learnerID)
|
||||
var (
|
||||
lastError error
|
||||
)
|
||||
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
|
||||
defer cancel()
|
||||
|
||||
_, err = cli.MemberPromote(ctx, learnerID)
|
||||
if err == nil {
|
||||
klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %016x", learnerID)
|
||||
return nil
|
||||
return true, nil
|
||||
}
|
||||
klog.V(5).Infof("[etcd] Promoting the learner %016x failed: %v", learnerID, err)
|
||||
return err
|
||||
lastError = err
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return lastError
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CheckClusterHealth returns nil for status Up or error for status Down
|
||||
|
Loading…
Reference in New Issue
Block a user