1
0
mirror of https://github.com/rancher/rke.git synced 2025-05-12 18:38:11 +00:00

Critical and non-critical addons

Configurable addon job timeout
This commit is contained in:
moelsayed 2018-05-07 23:51:09 +02:00
parent 47ddb6ee41
commit 9addf796a2
8 changed files with 97 additions and 38 deletions

View File

@ -49,7 +49,7 @@ nodes:
## Network Plugins
RKE supports the following network plugins:
RKE supports the following network plugins that are deployed as addons:
- Flannel
- Calico
@ -120,7 +120,12 @@ The following images are no longer required, and can be replaced by `rancher/rke
## Addons
RKE supports pluggable addons on cluster bootstrap, user can specify the addon yaml in the cluster.yml file, and when running
RKE supports pluggable addons. Addons are used to deploy several cluster components including:
- Network plugin
- KubeDNS
- Ingress controller
In addition, a user can specify the addon yaml in the cluster.yml file, and when running
```yaml
rke up --config cluster.yml
@ -128,7 +133,7 @@ rke up --config cluster.yml
RKE will deploy the addons yaml after the cluster starts, RKE first uploads this yaml file as a configmap in kubernetes cluster and then run a kubernetes job that mounts this config map and deploy the addons.
> Note that RKE doesn't support yet removal of the addons, so once they are deployed the first time you can't change them using rke
> Note that RKE doesn't support yet removal or update of the addons, so once they are deployed the first time you can't change them using rke
To start using addons use `addons:` option in the `cluster.yml` file for example:
@ -159,6 +164,17 @@ addons_include:
- ./nginx.yaml
```
#### Addon deployment jobs
RKE uses kubernetes Jobs to deploy addons. In some cases, addons deployment takes longer than expected. Starting with version `0.1.7-rc1`, RKE provides an option to controle the job check timeout in seconds:
```yaml
addon_job_timeout: 30
```
#### Critical and uncritical addons
As of version `0.1.7-rc1`, addons are split into two categories: critical and uncritical.
Critical addons will cause RKE to error out if they fail to deploy for any reason. While uncritical addons will just log a warning and continue with the deployment. Currently only the network plugin is considered critical.
## High Availability
RKE is HA ready, you can specify more than one controlplane host in the `cluster.yml` file, and rke will deploy master components on all of them, the kubelets are configured to connect to `127.0.0.1:6443` by default which is the address of `nginx-proxy` service that proxy requests to all master nodes.

View File

@ -156,6 +156,8 @@ ignore_docker_version: false
kubernetes_version: v1.10.1
# addons are deployed using kubernetes jobs. RKE will give up on trying to get the job status after this timeout in seconds..
addon_job_timeout: 30
# If set, this is the cluster name that will be used in the kube config file
# Default value is "local"
cluster_name: mycluster

View File

@ -36,17 +36,36 @@ type ingressOptions struct {
IngressBackend string
}
type addonError struct {
err string
isCritical bool
}
func (e *addonError) Error() string {
return e.err
}
func (c *Cluster) deployK8sAddOns(ctx context.Context) error {
if err := c.deployKubeDNS(ctx); err != nil {
return err
if err, ok := err.(*addonError); ok && err.isCritical {
return err
}
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", KubeDNSAddonResourceName, err)
}
return c.deployIngress(ctx)
if err := c.deployIngress(ctx); err != nil {
if err, ok := err.(*addonError); ok && err.isCritical {
return err
}
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", IngressAddonResourceName, err)
}
return nil
}
func (c *Cluster) deployUserAddOns(ctx context.Context) error {
log.Infof(ctx, "[addons] Setting up user addons")
if c.Addons != "" {
if err := c.doAddonDeploy(ctx, c.Addons, UserAddonResourceName); err != nil {
if err := c.doAddonDeploy(ctx, c.Addons, UserAddonResourceName, false); err != nil {
return err
}
}
@ -108,7 +127,7 @@ func (c *Cluster) deployAddonsInclude(ctx context.Context) error {
log.Infof(ctx, "[addons] Deploying %s", UserAddonsIncludeResourceName)
logrus.Debugf("[addons] Compiled addons yaml: %s", string(manifests))
return c.doAddonDeploy(ctx, string(manifests), UserAddonsIncludeResourceName)
return c.doAddonDeploy(ctx, string(manifests), UserAddonsIncludeResourceName, false)
}
func validateUserAddonYAML(addon []byte) error {
@ -158,7 +177,7 @@ func (c *Cluster) deployKubeDNS(ctx context.Context) error {
if err != nil {
return err
}
if err := c.doAddonDeploy(ctx, kubeDNSYaml, KubeDNSAddonResourceName); err != nil {
if err := c.doAddonDeploy(ctx, kubeDNSYaml, KubeDNSAddonResourceName, false); err != nil {
return err
}
log.Infof(ctx, "[addons] KubeDNS deployed successfully..")
@ -174,33 +193,35 @@ func (c *Cluster) deployWithKubectl(ctx context.Context, addonYaml string) error
return cmd.Run()
}
func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName string) error {
func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName string, isCritical bool) error {
if c.UseKubectlDeploy {
return c.deployWithKubectl(ctx, addonYaml)
if err := c.deployWithKubectl(ctx, addonYaml); err != nil {
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
}
err := c.StoreAddonConfigMap(ctx, addonYaml, resourceName)
if err != nil {
return fmt.Errorf("Failed to save addon ConfigMap: %v", err)
return &addonError{fmt.Sprintf("Failed to save addon ConfigMap: %v", err), isCritical}
}
log.Infof(ctx, "[addons] Executing deploy job..")
k8sClient, err := k8s.NewClient(c.LocalKubeConfigPath, c.K8sWrapTransport)
if err != nil {
return err
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride)
if err != nil {
return fmt.Errorf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err)
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
}
addonJob, err := addons.GetAddonsExcuteJob(resourceName, node.Name, c.Services.KubeAPI.Image)
if err != nil {
return fmt.Errorf("Failed to deploy addon execute job: %v", err)
return &addonError{fmt.Sprintf("Failed to generate addon execute job: %v", err), isCritical}
}
err = c.ApplySystemAddonExcuteJob(addonJob)
if err != nil {
return fmt.Errorf("Failed to deploy addon execute job: %v", err)
return &addonError{fmt.Sprintf("%v", err), isCritical}
}
return nil
}
@ -234,7 +255,7 @@ func (c *Cluster) StoreAddonConfigMap(ctx context.Context, addonYaml string, add
}
func (c *Cluster) ApplySystemAddonExcuteJob(addonJob string) error {
if err := k8s.ApplyK8sSystemJob(addonJob, c.LocalKubeConfigPath, c.K8sWrapTransport); err != nil {
if err := k8s.ApplyK8sSystemJob(addonJob, c.LocalKubeConfigPath, c.K8sWrapTransport, c.AddonJobTimeout); err != nil {
logrus.Error(err)
return err
}
@ -261,7 +282,7 @@ func (c *Cluster) deployIngress(ctx context.Context) error {
if err != nil {
return err
}
if err := c.doAddonDeploy(ctx, ingressYaml, IngressAddonResourceName); err != nil {
if err := c.doAddonDeploy(ctx, ingressYaml, IngressAddonResourceName, false); err != nil {
return err
}
log.Infof(ctx, "[ingress] ingress controller %s is successfully deployed", c.Ingress.Provider)

View File

@ -289,7 +289,14 @@ func (c *Cluster) deployAddons(ctx context.Context) error {
if err := c.deployK8sAddOns(ctx); err != nil {
return err
}
return c.deployUserAddOns(ctx)
if err := c.deployUserAddOns(ctx); err != nil {
if err, ok := err.(*addonError); ok && err.isCritical {
return err
}
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", UserAddonsIncludeResourceName, err)
}
return nil
}
func (c *Cluster) SyncLabelsAndTaints(ctx context.Context) error {
@ -349,9 +356,14 @@ func ConfigureCluster(
if len(kubeCluster.ControlPlaneHosts) > 0 {
kubeCluster.Certificates = crtBundle
if err := kubeCluster.deployNetworkPlugin(ctx); err != nil {
if err, ok := err.(*addonError); ok && err.isCritical {
return err
}
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", NetworkPluginResourceName, err)
}
if err := kubeCluster.deployAddons(ctx); err != nil {
return err
}
return kubeCluster.deployAddons(ctx)
}
return nil
}

View File

@ -3,6 +3,7 @@ package cluster
import (
"context"
"github.com/rancher/rke/k8s"
"github.com/rancher/rke/log"
"github.com/rancher/rke/services"
"github.com/rancher/types/apis/management.cattle.io/v3"
@ -87,6 +88,9 @@ func (c *Cluster) setClusterDefaults(ctx context.Context) {
if len(c.Version) == 0 {
c.Version = DefaultK8sVersion
}
if c.AddonJobTimeout == 0 {
c.AddonJobTimeout = k8s.DefaultTimeout
}
c.setClusterImageDefaults()
c.setClusterServicesDefaults()
c.setClusterNetworkDefaults()

View File

@ -41,26 +41,15 @@ const (
ProtocolUDP = "UDP"
FlannelNetworkPlugin = "flannel"
FlannelImage = "flannel_image"
FlannelCNIImage = "flannel_cni_image"
FlannelIface = "flannel_iface"
CalicoNetworkPlugin = "calico"
CalicoNodeImage = "calico_node_image"
CalicoCNIImage = "calico_cni_image"
CalicoControllersImage = "calico_controllers_image"
CalicoctlImage = "calicoctl_image"
CalicoCloudProvider = "calico_cloud_provider"
CalicoNetworkPlugin = "calico"
CalicoCloudProvider = "calico_cloud_provider"
CanalNetworkPlugin = "canal"
CanalNodeImage = "canal_node_image"
CanalCNIImage = "canal_cni_image"
CanalFlannelImage = "canal_flannel_image"
CanalIface = "canal_iface"
WeaveNetworkPlugin = "weave"
WeaveImage = "weave_node_image"
WeaveCNIImage = "weave_cni_image"
// List of map keys to be used with network templates
@ -140,7 +129,7 @@ func (c *Cluster) doFlannelDeploy(ctx context.Context) error {
if err != nil {
return err
}
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
}
func (c *Cluster) doCalicoDeploy(ctx context.Context) error {
@ -158,7 +147,7 @@ func (c *Cluster) doCalicoDeploy(ctx context.Context) error {
if err != nil {
return err
}
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
}
func (c *Cluster) doCanalDeploy(ctx context.Context) error {
@ -180,7 +169,7 @@ func (c *Cluster) doCanalDeploy(ctx context.Context) error {
if err != nil {
return err
}
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
}
func (c *Cluster) doWeaveDeploy(ctx context.Context) error {
@ -194,7 +183,7 @@ func (c *Cluster) doWeaveDeploy(ctx context.Context) error {
if err != nil {
return err
}
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
}
func (c *Cluster) getNetworkPluginManifest(pluginConfig map[string]string) (string, error) {

View File

@ -12,7 +12,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTransport) error {
func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTransport, timeout int) error {
job := v1.Job{}
if err := decodeYamlResource(&job, jobYaml); err != nil {
return err
@ -32,7 +32,7 @@ func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTran
return err
}
logrus.Debugf("[k8s] waiting for job %s to complete..", job.Name)
return retryTo(ensureJobCompleted, k8sClient, job, DefaultRetries, DefaultSleepSeconds)
return retryToWithTimeout(ensureJobCompleted, k8sClient, job, timeout)
}
func ensureJobCompleted(k8sClient *kubernetes.Clientset, j interface{}) error {

View File

@ -13,6 +13,7 @@ import (
const (
DefaultRetries = 5
DefaultSleepSeconds = 5
DefaultTimeout = 30
K8sWrapTransportTimeout = 30
)
@ -42,6 +43,20 @@ func decodeYamlResource(resource interface{}, yamlManifest string) error {
return decoder.Decode(&resource)
}
func retryToWithTimeout(runFunc k8sCall, k8sClient *kubernetes.Clientset, resource interface{}, timeout int) error {
var err error
timePassed := 0
for timePassed < timeout {
if err = runFunc(k8sClient, resource); err != nil {
time.Sleep(time.Second * time.Duration(DefaultSleepSeconds))
timePassed += DefaultSleepSeconds
continue
}
return nil
}
return err
}
func retryTo(runFunc k8sCall, k8sClient *kubernetes.Clientset, resource interface{}, retries, sleepSeconds int) error {
var err error
if retries == 0 {