2017-10-29 09:45:21 +00:00
|
|
|
package services
|
|
|
|
|
|
|
|
import (
|
2018-01-09 22:10:56 +00:00
|
|
|
"context"
|
2020-02-14 17:40:23 +00:00
|
|
|
"fmt"
|
2020-02-04 19:27:52 +00:00
|
|
|
"strings"
|
|
|
|
"sync"
|
2018-01-09 22:10:56 +00:00
|
|
|
|
2020-02-04 19:27:52 +00:00
|
|
|
"github.com/docker/docker/client"
|
|
|
|
"github.com/rancher/rke/docker"
|
2017-10-29 09:45:21 +00:00
|
|
|
"github.com/rancher/rke/hosts"
|
2020-02-04 19:27:52 +00:00
|
|
|
"github.com/rancher/rke/k8s"
|
2018-01-09 22:10:56 +00:00
|
|
|
"github.com/rancher/rke/log"
|
2018-05-01 00:25:52 +00:00
|
|
|
"github.com/rancher/rke/pki"
|
2018-10-17 22:26:54 +00:00
|
|
|
"github.com/rancher/rke/util"
|
2019-07-03 22:18:33 +00:00
|
|
|
v3 "github.com/rancher/types/apis/management.cattle.io/v3"
|
2020-02-04 19:27:52 +00:00
|
|
|
"github.com/sirupsen/logrus"
|
2018-02-01 15:16:02 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2020-02-04 19:27:52 +00:00
|
|
|
"k8s.io/client-go/kubernetes"
|
|
|
|
"k8s.io/kubectl/pkg/drain"
|
2017-10-29 09:45:21 +00:00
|
|
|
)
|
|
|
|
|
2018-05-01 00:25:52 +00:00
|
|
|
func RunControlPlane(ctx context.Context, controlHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool, alpineImage string, certMap map[string]pki.CertificatePKI) error {
|
2018-10-17 22:26:54 +00:00
|
|
|
if updateWorkersOnly {
|
|
|
|
return nil
|
|
|
|
}
|
2018-01-09 22:10:56 +00:00
|
|
|
log.Infof(ctx, "[%s] Building up Controller Plane..", ControlRole)
|
2018-02-01 15:16:02 +00:00
|
|
|
var errgrp errgroup.Group
|
2018-10-17 22:26:54 +00:00
|
|
|
|
|
|
|
hostsQueue := util.GetObjectQueue(controlHosts)
|
|
|
|
for w := 0; w < WorkerThreads; w++ {
|
2018-02-01 15:16:02 +00:00
|
|
|
errgrp.Go(func() error {
|
2018-10-17 22:26:54 +00:00
|
|
|
var errList []error
|
|
|
|
for host := range hostsQueue {
|
|
|
|
runHost := host.(*hosts.Host)
|
|
|
|
err := doDeployControlHost(ctx, runHost, localConnDialerFactory, prsMap, cpNodePlanMap[runHost.Address].Processes, alpineImage, certMap)
|
|
|
|
if err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return util.ErrList(errList)
|
2018-02-01 15:16:02 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
if err := errgrp.Wait(); err != nil {
|
|
|
|
return err
|
2017-10-29 09:45:21 +00:00
|
|
|
}
|
2018-01-09 22:10:56 +00:00
|
|
|
log.Infof(ctx, "[%s] Successfully started Controller Plane..", ControlRole)
|
2017-10-29 09:45:21 +00:00
|
|
|
return nil
|
|
|
|
}
|
2017-11-15 02:54:26 +00:00
|
|
|
|
2020-02-14 17:40:23 +00:00
|
|
|
func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Clientset, controlHosts []*hosts.Host, localConnDialerFactory hosts.DialerFactory,
|
|
|
|
prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool, alpineImage string, certMap map[string]pki.CertificatePKI,
|
|
|
|
upgradeStrategy *v3.NodeUpgradeStrategy, newHosts, inactiveHosts map[string]bool) error {
|
2020-02-04 19:27:52 +00:00
|
|
|
if updateWorkersOnly {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var drainHelper drain.Helper
|
|
|
|
|
2020-02-14 17:40:23 +00:00
|
|
|
log.Infof(ctx, "[%s] Processing controlplane hosts for upgrade one at a time", ControlRole)
|
2020-02-04 19:27:52 +00:00
|
|
|
if len(newHosts) > 0 {
|
|
|
|
var nodes []string
|
|
|
|
for _, host := range controlHosts {
|
|
|
|
if newHosts[host.HostnameOverride] {
|
|
|
|
nodes = append(nodes, host.HostnameOverride)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(nodes) > 0 {
|
|
|
|
log.Infof(ctx, "[%s] Adding controlplane nodes %v to the cluster", ControlRole, strings.Join(nodes, ","))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if upgradeStrategy.Drain {
|
|
|
|
drainHelper = getDrainHelper(kubeClient, *upgradeStrategy)
|
2020-02-14 17:40:23 +00:00
|
|
|
log.Infof(ctx, "[%s] Parameters provided to drain command: %#v", ControlRole, fmt.Sprintf("Force: %v, IgnoreAllDaemonSets: %v, DeleteLocalData: %v, Timeout: %v, GracePeriodSeconds: %v", drainHelper.Force, drainHelper.IgnoreAllDaemonSets, drainHelper.DeleteLocalData, drainHelper.Timeout, drainHelper.GracePeriodSeconds))
|
|
|
|
}
|
|
|
|
|
|
|
|
currentHostsPool := make(map[string]bool)
|
|
|
|
for _, host := range controlHosts {
|
|
|
|
currentHostsPool[host.HostnameOverride] = true
|
2020-02-04 19:27:52 +00:00
|
|
|
}
|
|
|
|
// upgrade control plane hosts one at a time for zero downtime upgrades
|
|
|
|
for _, host := range controlHosts {
|
|
|
|
log.Infof(ctx, "Processing controlplane host %v", host.HostnameOverride)
|
|
|
|
if newHosts[host.HostnameOverride] {
|
|
|
|
if err := doDeployControlHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, alpineImage, certMap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
if err := doDeployWorkerPlaneHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, certMap, updateWorkersOnly, alpineImage); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-04 19:27:52 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
nodes, err := getNodeListForUpgrade(kubeClient, &sync.Map{}, newHosts, inactiveHosts)
|
2020-02-04 19:27:52 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
var maxUnavailableHit bool
|
2020-02-25 01:56:02 +00:00
|
|
|
var nodeNotReady string
|
2020-02-04 19:27:52 +00:00
|
|
|
for _, node := range nodes {
|
|
|
|
// in case any previously added nodes or till now unprocessed nodes become unreachable during upgrade
|
2020-02-14 17:40:23 +00:00
|
|
|
if !k8s.IsNodeReady(node) && currentHostsPool[node.Labels[k8s.HostnameLabel]] {
|
2020-02-04 19:27:52 +00:00
|
|
|
maxUnavailableHit = true
|
2020-02-25 01:56:02 +00:00
|
|
|
nodeNotReady = node.Labels[k8s.HostnameLabel]
|
2020-02-04 19:27:52 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if maxUnavailableHit {
|
2020-02-25 01:56:02 +00:00
|
|
|
return fmt.Errorf("maxUnavailable limit hit for controlplane since node %v is in NotReady state", nodeNotReady)
|
2020-02-04 19:27:52 +00:00
|
|
|
}
|
|
|
|
|
2020-02-14 17:40:23 +00:00
|
|
|
controlPlaneUpgradable, err := isControlPlaneHostUpgradable(ctx, host, cpNodePlanMap[host.Address].Processes)
|
2020-02-04 19:27:52 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
workerPlaneUpgradable, err := isWorkerHostUpgradable(ctx, host, cpNodePlanMap[host.Address].Processes)
|
|
|
|
if err != nil {
|
2020-02-04 19:27:52 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
if !controlPlaneUpgradable && !workerPlaneUpgradable {
|
|
|
|
log.Infof(ctx, "Upgrade not required for controlplane and worker components of host %v", host.HostnameOverride)
|
|
|
|
continue
|
2020-02-04 19:27:52 +00:00
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
if err := upgradeControlHost(ctx, kubeClient, host, upgradeStrategy.Drain, drainHelper, localConnDialerFactory, prsMap, cpNodePlanMap, updateWorkersOnly, alpineImage, certMap, controlPlaneUpgradable, workerPlaneUpgradable); err != nil {
|
2020-02-04 19:27:52 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
}
|
|
|
|
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func upgradeControlHost(ctx context.Context, kubeClient *kubernetes.Clientset, host *hosts.Host, drain bool, drainHelper drain.Helper,
|
|
|
|
localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, cpNodePlanMap map[string]v3.RKEConfigNodePlan, updateWorkersOnly bool,
|
|
|
|
alpineImage string, certMap map[string]pki.CertificatePKI, controlPlaneUpgradable, workerPlaneUpgradable bool) error {
|
|
|
|
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := cordonAndDrainNode(kubeClient, host, drain, drainHelper, ControlRole); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if controlPlaneUpgradable {
|
|
|
|
log.Infof(ctx, "Upgrading controlplane components for control host %v", host.HostnameOverride)
|
|
|
|
if err := doDeployControlHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, alpineImage, certMap); err != nil {
|
2020-02-04 19:27:52 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
}
|
|
|
|
if workerPlaneUpgradable {
|
|
|
|
log.Infof(ctx, "Upgrading workerplane components for control host %v", host.HostnameOverride)
|
|
|
|
if err := doDeployWorkerPlaneHost(ctx, host, localConnDialerFactory, prsMap, cpNodePlanMap[host.Address].Processes, certMap, updateWorkersOnly, alpineImage); err != nil {
|
2020-02-04 19:27:52 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-02-14 17:40:23 +00:00
|
|
|
|
|
|
|
if err := checkNodeReady(kubeClient, host, ControlRole); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := k8s.CordonUncordon(kubeClient, host.HostnameOverride, false); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-04 19:27:52 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-01-09 22:10:56 +00:00
|
|
|
func RemoveControlPlane(ctx context.Context, controlHosts []*hosts.Host, force bool) error {
|
|
|
|
log.Infof(ctx, "[%s] Tearing down the Controller Plane..", ControlRole)
|
2018-10-23 23:38:00 +00:00
|
|
|
var errgrp errgroup.Group
|
|
|
|
hostsQueue := util.GetObjectQueue(controlHosts)
|
|
|
|
for w := 0; w < WorkerThreads; w++ {
|
|
|
|
errgrp.Go(func() error {
|
|
|
|
var errList []error
|
|
|
|
for host := range hostsQueue {
|
|
|
|
runHost := host.(*hosts.Host)
|
|
|
|
if err := removeKubeAPI(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
if err := removeKubeController(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
if err := removeScheduler(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
// force is true in remove, false in reconcile
|
2019-07-03 22:18:33 +00:00
|
|
|
if !runHost.IsWorker || !runHost.IsEtcd || force {
|
2018-10-23 23:38:00 +00:00
|
|
|
if err := removeKubelet(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
if err := removeKubeproxy(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
if err := removeSidekick(ctx, runHost); err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
}
|
2017-12-08 23:05:55 +00:00
|
|
|
}
|
2018-10-23 23:38:00 +00:00
|
|
|
return util.ErrList(errList)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := errgrp.Wait(); err != nil {
|
|
|
|
return err
|
2017-11-20 18:08:50 +00:00
|
|
|
}
|
2018-10-23 23:38:00 +00:00
|
|
|
|
2018-01-23 23:02:22 +00:00
|
|
|
log.Infof(ctx, "[%s] Successfully tore down Controller Plane..", ControlRole)
|
2017-11-20 18:08:50 +00:00
|
|
|
return nil
|
|
|
|
}
|
2018-02-01 15:16:02 +00:00
|
|
|
|
2018-08-20 04:37:04 +00:00
|
|
|
func RestartControlPlane(ctx context.Context, controlHosts []*hosts.Host) error {
|
|
|
|
log.Infof(ctx, "[%s] Restarting the Controller Plane..", ControlRole)
|
|
|
|
var errgrp errgroup.Group
|
|
|
|
|
|
|
|
hostsQueue := util.GetObjectQueue(controlHosts)
|
|
|
|
for w := 0; w < WorkerThreads; w++ {
|
|
|
|
errgrp.Go(func() error {
|
|
|
|
var errList []error
|
|
|
|
for host := range hostsQueue {
|
|
|
|
runHost := host.(*hosts.Host)
|
|
|
|
// restart KubeAPI
|
2019-01-14 17:51:20 +00:00
|
|
|
if err := RestartKubeAPI(ctx, runHost); err != nil {
|
2018-08-20 04:37:04 +00:00
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// restart KubeController
|
2019-01-14 17:51:20 +00:00
|
|
|
if err := RestartKubeController(ctx, runHost); err != nil {
|
2018-08-20 04:37:04 +00:00
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// restart scheduler
|
2019-01-14 17:51:20 +00:00
|
|
|
err := RestartScheduler(ctx, runHost)
|
2018-08-20 04:37:04 +00:00
|
|
|
if err != nil {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return util.ErrList(errList)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
if err := errgrp.Wait(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
log.Infof(ctx, "[%s] Successfully restarted Controller Plane..", ControlRole)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-05-01 00:25:52 +00:00
|
|
|
func doDeployControlHost(ctx context.Context, host *hosts.Host, localConnDialerFactory hosts.DialerFactory, prsMap map[string]v3.PrivateRegistry, processMap map[string]v3.Process, alpineImage string, certMap map[string]pki.CertificatePKI) error {
|
2018-02-01 15:16:02 +00:00
|
|
|
if host.IsWorker {
|
|
|
|
if err := removeNginxProxy(ctx, host); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// run sidekick
|
2018-02-13 00:47:56 +00:00
|
|
|
if err := runSidekick(ctx, host, prsMap, processMap[SidekickContainerName]); err != nil {
|
2018-02-01 15:16:02 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
// run kubeapi
|
2018-05-01 00:25:52 +00:00
|
|
|
if err := runKubeAPI(ctx, host, localConnDialerFactory, prsMap, processMap[KubeAPIContainerName], alpineImage, certMap); err != nil {
|
2018-02-01 15:16:02 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
// run kubecontroller
|
2018-03-21 17:20:58 +00:00
|
|
|
if err := runKubeController(ctx, host, localConnDialerFactory, prsMap, processMap[KubeControllerContainerName], alpineImage); err != nil {
|
2018-02-01 15:16:02 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
// run scheduler
|
2018-03-21 17:20:58 +00:00
|
|
|
return runScheduler(ctx, host, localConnDialerFactory, prsMap, processMap[SchedulerContainerName], alpineImage)
|
2018-02-01 15:16:02 +00:00
|
|
|
}
|
2020-02-04 19:27:52 +00:00
|
|
|
|
|
|
|
func isControlPlaneHostUpgradable(ctx context.Context, host *hosts.Host, processMap map[string]v3.Process) (bool, error) {
|
|
|
|
for _, service := range []string{SidekickContainerName, KubeAPIContainerName, KubeControllerContainerName, SchedulerContainerName} {
|
|
|
|
process := processMap[service]
|
|
|
|
imageCfg, hostCfg, _ := GetProcessConfig(process, host)
|
|
|
|
upgradable, err := docker.IsContainerUpgradable(ctx, host.DClient, imageCfg, hostCfg, service, host.Address, ControlRole)
|
|
|
|
if err != nil {
|
|
|
|
if client.IsErrNotFound(err) {
|
|
|
|
// doDeployControlHost should be called so this container gets recreated
|
|
|
|
logrus.Debugf("[%s] Host %v is upgradable because %v needs to run", ControlRole, host.HostnameOverride, service)
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if upgradable {
|
|
|
|
logrus.Debugf("[%s] Host %v is upgradable because %v has changed", ControlRole, host.HostnameOverride, service)
|
|
|
|
// host upgradable even if a single service is upgradable
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
logrus.Debugf("[%s] Host %v is not upgradable", ControlRole, host.HostnameOverride)
|
|
|
|
return false, nil
|
|
|
|
}
|