1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-01 15:06:23 +00:00

Attempt upgrade on NotReady hosts

This commit is contained in:
rajashree
2020-02-26 13:33:22 -08:00
parent ca3a3b1814
commit e27a05f8b1
10 changed files with 313 additions and 177 deletions

View File

@@ -69,6 +69,7 @@ type Cluster struct {
EncryptionConfig encryptionConfig
NewHosts map[string]bool
MaxUnavailableForWorkerNodes int
MaxUnavailableForControlNodes int
HostsLabeledToIgnoreUpgrade map[string]bool
}
@@ -109,10 +110,10 @@ const (
networkAddon = "network"
)
func (c *Cluster) DeployControlPlane(ctx context.Context, svcOptionData map[string]*v3.KubernetesServicesOptions, reconcileCluster bool) error {
func (c *Cluster) DeployControlPlane(ctx context.Context, svcOptionData map[string]*v3.KubernetesServicesOptions, reconcileCluster bool) (string, error) {
kubeClient, err := k8s.NewClient(c.LocalKubeConfigPath, c.K8sWrapTransport)
if err != nil {
return fmt.Errorf("failed to initialize new kubernetes client: %v", err)
return "", fmt.Errorf("failed to initialize new kubernetes client: %v", err)
}
// Deploy Etcd Plane
@@ -126,15 +127,19 @@ func (c *Cluster) DeployControlPlane(ctx context.Context, svcOptionData map[stri
log.Infof(ctx, "[etcd] External etcd connection string has been specified, skipping etcd plane")
} else {
if err := services.RunEtcdPlane(ctx, c.EtcdHosts, etcdNodePlanMap, c.LocalConnDialerFactory, c.PrivateRegistriesMap, c.UpdateWorkersOnly, c.SystemImages.Alpine, c.Services.Etcd, c.Certificates); err != nil {
return fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
return "", fmt.Errorf("[etcd] Failed to bring up Etcd Plane: %v", err)
}
}
// Deploy Control plane
cpNodePlanMap := make(map[string]v3.RKEConfigNodePlan)
// Build cp node plan map
var notReadyHosts []*hosts.Host
for _, cpHost := range c.ControlPlaneHosts {
cpNodePlanMap[cpHost.Address] = BuildRKEConfigNodePlan(ctx, c, cpHost, cpHost.DockerInfo, svcOptionData)
if err := services.CheckNodeReady(kubeClient, cpHost, services.ControlRole); err != nil {
notReadyHosts = append(notReadyHosts, cpHost)
}
}
if !reconcileCluster {
@@ -145,12 +150,18 @@ func (c *Cluster) DeployControlPlane(ctx context.Context, svcOptionData map[stri
c.UpdateWorkersOnly,
c.SystemImages.Alpine,
c.Certificates); err != nil {
return fmt.Errorf("[controlPlane] Failed to bring up Control Plane: %v", err)
return "", fmt.Errorf("[controlPlane] Failed to bring up Control Plane: %v", err)
}
return nil
return "", nil
}
return c.UpgradeControlPlane(ctx, kubeClient, cpNodePlanMap, notReadyHosts)
}
func (c *Cluster) UpgradeControlPlane(ctx context.Context, kubeClient *kubernetes.Clientset, cpNodePlanMap map[string]v3.RKEConfigNodePlan, notReadyHosts []*hosts.Host) (string, error) {
inactiveHosts := make(map[string]bool)
var controlPlaneHosts []*hosts.Host
var notReadyHostNames []string
for _, host := range c.InactiveHosts {
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
inactiveHosts[host.HostnameOverride] = true
@@ -161,16 +172,35 @@ func (c *Cluster) DeployControlPlane(ctx context.Context, svcOptionData map[stri
controlPlaneHosts = append(controlPlaneHosts, host)
}
}
if err := services.UpgradeControlPlaneNodes(ctx, kubeClient, controlPlaneHosts,
for _, host := range notReadyHosts {
notReadyHostNames = append(notReadyHostNames, host.HostnameOverride)
}
// attempt upgrade on NotReady hosts without respecting maxUnavailable
logrus.Infof("Attempting upgrade of controlplane components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
services.RunControlPlane(ctx, notReadyHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
cpNodePlanMap,
c.UpdateWorkersOnly,
c.SystemImages.Alpine,
c.Certificates, c.UpgradeStrategy, c.NewHosts, inactiveHosts); err != nil {
return fmt.Errorf("[controlPlane] Failed to upgrade Control Plane: %v", err)
c.Certificates)
for _, host := range notReadyHosts {
services.CheckNodeReady(kubeClient, host, services.ControlRole)
}
return nil
// rolling upgrade respecting maxUnavailable
errMsgMaxUnavailableNotFailed, err := services.UpgradeControlPlaneNodes(ctx, kubeClient, controlPlaneHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
cpNodePlanMap,
c.UpdateWorkersOnly,
c.SystemImages.Alpine,
c.Certificates, c.UpgradeStrategy, c.NewHosts, inactiveHosts, c.MaxUnavailableForControlNodes)
if err != nil {
return "", fmt.Errorf("[controlPlane] Failed to upgrade Control Plane: %v", err)
}
return errMsgMaxUnavailableNotFailed, nil
}
func (c *Cluster) DeployWorkerPlane(ctx context.Context, svcOptionData map[string]*v3.KubernetesServicesOptions, reconcileCluster bool) (string, error) {
@@ -182,12 +212,16 @@ func (c *Cluster) DeployWorkerPlane(ctx context.Context, svcOptionData map[strin
// Deploy Worker plane
workerNodePlanMap := make(map[string]v3.RKEConfigNodePlan)
// Build cp node plan map
var notReadyHosts []*hosts.Host
allHosts := hosts.GetUniqueHostList(c.EtcdHosts, c.ControlPlaneHosts, c.WorkerHosts)
for _, host := range allHosts {
workerNodePlanMap[host.Address] = BuildRKEConfigNodePlan(ctx, c, host, host.DockerInfo, svcOptionData)
if host.IsControl || c.HostsLabeledToIgnoreUpgrade[host.Address] {
continue
}
if err := services.CheckNodeReady(kubeClient, host, services.WorkerRole); err != nil {
notReadyHosts = append(notReadyHosts, host)
}
if !host.IsEtcd {
// separating hosts with only worker role so they undergo upgrade in maxUnavailable batches
workerOnlyHosts = append(workerOnlyHosts, host)
@@ -211,12 +245,32 @@ func (c *Cluster) DeployWorkerPlane(ctx context.Context, svcOptionData map[strin
return "", nil
}
return c.UpgradeWorkerPlane(ctx, kubeClient, workerNodePlanMap, notReadyHosts, etcdAndWorkerHosts, workerOnlyHosts)
}
func (c *Cluster) UpgradeWorkerPlane(ctx context.Context, kubeClient *kubernetes.Clientset, workerNodePlanMap map[string]v3.RKEConfigNodePlan, notReadyHosts, etcdAndWorkerHosts, workerOnlyHosts []*hosts.Host) (string, error) {
inactiveHosts := make(map[string]bool)
var notReadyHostNames []string
for _, host := range c.InactiveHosts {
if !c.HostsLabeledToIgnoreUpgrade[host.Address] {
inactiveHosts[host.HostnameOverride] = true
}
}
for _, host := range notReadyHosts {
notReadyHostNames = append(notReadyHostNames, host.HostnameOverride)
}
// attempt upgrade on NotReady hosts without respecting maxUnavailable
logrus.Infof("Attempting upgrade of worker components on following hosts in NotReady status: %v", strings.Join(notReadyHostNames, ","))
services.RunWorkerPlane(ctx, notReadyHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,
workerNodePlanMap,
c.Certificates,
c.UpdateWorkersOnly,
c.SystemImages.Alpine)
for _, host := range notReadyHosts {
services.CheckNodeReady(kubeClient, host, services.WorkerRole)
}
errMsgMaxUnavailableNotFailed, err := services.UpgradeWorkerPlaneForWorkerAndEtcdNodes(ctx, kubeClient, etcdAndWorkerHosts, workerOnlyHosts, inactiveHosts,
c.LocalConnDialerFactory,
c.PrivateRegistriesMap,