mirror of
https://github.com/rancher/rke.git
synced 2025-05-12 18:38:11 +00:00
Critical and non-critical addons
Configurable addon job timeout
This commit is contained in:
parent
47ddb6ee41
commit
9addf796a2
22
README.md
22
README.md
@ -49,7 +49,7 @@ nodes:
|
||||
|
||||
## Network Plugins
|
||||
|
||||
RKE supports the following network plugins:
|
||||
RKE supports the following network plugins that are deployed as addons:
|
||||
|
||||
- Flannel
|
||||
- Calico
|
||||
@ -120,7 +120,12 @@ The following images are no longer required, and can be replaced by `rancher/rke
|
||||
|
||||
## Addons
|
||||
|
||||
RKE supports pluggable addons on cluster bootstrap, user can specify the addon yaml in the cluster.yml file, and when running
|
||||
RKE supports pluggable addons. Addons are used to deploy several cluster components including:
|
||||
- Network plugin
|
||||
- KubeDNS
|
||||
- Ingress controller
|
||||
|
||||
In addition, a user can specify the addon yaml in the cluster.yml file, and when running
|
||||
|
||||
```yaml
|
||||
rke up --config cluster.yml
|
||||
@ -128,7 +133,7 @@ rke up --config cluster.yml
|
||||
|
||||
RKE will deploy the addons yaml after the cluster starts, RKE first uploads this yaml file as a configmap in kubernetes cluster and then run a kubernetes job that mounts this config map and deploy the addons.
|
||||
|
||||
> Note that RKE doesn't support yet removal of the addons, so once they are deployed the first time you can't change them using rke
|
||||
> Note that RKE doesn't support yet removal or update of the addons, so once they are deployed the first time you can't change them using rke
|
||||
|
||||
To start using addons use `addons:` option in the `cluster.yml` file for example:
|
||||
|
||||
@ -159,6 +164,17 @@ addons_include:
|
||||
- ./nginx.yaml
|
||||
```
|
||||
|
||||
#### Addon deployment jobs
|
||||
|
||||
RKE uses kubernetes Jobs to deploy addons. In some cases, addons deployment takes longer than expected. Starting with version `0.1.7-rc1`, RKE provides an option to controle the job check timeout in seconds:
|
||||
```yaml
|
||||
addon_job_timeout: 30
|
||||
```
|
||||
|
||||
#### Critical and uncritical addons
|
||||
As of version `0.1.7-rc1`, addons are split into two categories: critical and uncritical.
|
||||
|
||||
Critical addons will cause RKE to error out if they fail to deploy for any reason. While uncritical addons will just log a warning and continue with the deployment. Currently only the network plugin is considered critical.
|
||||
## High Availability
|
||||
|
||||
RKE is HA ready, you can specify more than one controlplane host in the `cluster.yml` file, and rke will deploy master components on all of them, the kubelets are configured to connect to `127.0.0.1:6443` by default which is the address of `nginx-proxy` service that proxy requests to all master nodes.
|
||||
|
@ -156,6 +156,8 @@ ignore_docker_version: false
|
||||
|
||||
kubernetes_version: v1.10.1
|
||||
|
||||
# addons are deployed using kubernetes jobs. RKE will give up on trying to get the job status after this timeout in seconds..
|
||||
addon_job_timeout: 30
|
||||
# If set, this is the cluster name that will be used in the kube config file
|
||||
# Default value is "local"
|
||||
cluster_name: mycluster
|
||||
|
@ -36,17 +36,36 @@ type ingressOptions struct {
|
||||
IngressBackend string
|
||||
}
|
||||
|
||||
type addonError struct {
|
||||
err string
|
||||
isCritical bool
|
||||
}
|
||||
|
||||
func (e *addonError) Error() string {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (c *Cluster) deployK8sAddOns(ctx context.Context) error {
|
||||
if err := c.deployKubeDNS(ctx); err != nil {
|
||||
return err
|
||||
if err, ok := err.(*addonError); ok && err.isCritical {
|
||||
return err
|
||||
}
|
||||
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", KubeDNSAddonResourceName, err)
|
||||
}
|
||||
return c.deployIngress(ctx)
|
||||
if err := c.deployIngress(ctx); err != nil {
|
||||
if err, ok := err.(*addonError); ok && err.isCritical {
|
||||
return err
|
||||
}
|
||||
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", IngressAddonResourceName, err)
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) deployUserAddOns(ctx context.Context) error {
|
||||
log.Infof(ctx, "[addons] Setting up user addons")
|
||||
if c.Addons != "" {
|
||||
if err := c.doAddonDeploy(ctx, c.Addons, UserAddonResourceName); err != nil {
|
||||
if err := c.doAddonDeploy(ctx, c.Addons, UserAddonResourceName, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -108,7 +127,7 @@ func (c *Cluster) deployAddonsInclude(ctx context.Context) error {
|
||||
log.Infof(ctx, "[addons] Deploying %s", UserAddonsIncludeResourceName)
|
||||
logrus.Debugf("[addons] Compiled addons yaml: %s", string(manifests))
|
||||
|
||||
return c.doAddonDeploy(ctx, string(manifests), UserAddonsIncludeResourceName)
|
||||
return c.doAddonDeploy(ctx, string(manifests), UserAddonsIncludeResourceName, false)
|
||||
}
|
||||
|
||||
func validateUserAddonYAML(addon []byte) error {
|
||||
@ -158,7 +177,7 @@ func (c *Cluster) deployKubeDNS(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.doAddonDeploy(ctx, kubeDNSYaml, KubeDNSAddonResourceName); err != nil {
|
||||
if err := c.doAddonDeploy(ctx, kubeDNSYaml, KubeDNSAddonResourceName, false); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof(ctx, "[addons] KubeDNS deployed successfully..")
|
||||
@ -174,33 +193,35 @@ func (c *Cluster) deployWithKubectl(ctx context.Context, addonYaml string) error
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName string) error {
|
||||
func (c *Cluster) doAddonDeploy(ctx context.Context, addonYaml, resourceName string, isCritical bool) error {
|
||||
if c.UseKubectlDeploy {
|
||||
return c.deployWithKubectl(ctx, addonYaml)
|
||||
if err := c.deployWithKubectl(ctx, addonYaml); err != nil {
|
||||
return &addonError{fmt.Sprintf("%v", err), isCritical}
|
||||
}
|
||||
}
|
||||
|
||||
err := c.StoreAddonConfigMap(ctx, addonYaml, resourceName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to save addon ConfigMap: %v", err)
|
||||
return &addonError{fmt.Sprintf("Failed to save addon ConfigMap: %v", err), isCritical}
|
||||
}
|
||||
|
||||
log.Infof(ctx, "[addons] Executing deploy job..")
|
||||
k8sClient, err := k8s.NewClient(c.LocalKubeConfigPath, c.K8sWrapTransport)
|
||||
if err != nil {
|
||||
return err
|
||||
return &addonError{fmt.Sprintf("%v", err), isCritical}
|
||||
}
|
||||
node, err := k8s.GetNode(k8sClient, c.ControlPlaneHosts[0].HostnameOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err)
|
||||
return &addonError{fmt.Sprintf("Failed to get Node [%s]: %v", c.ControlPlaneHosts[0].HostnameOverride, err), isCritical}
|
||||
}
|
||||
addonJob, err := addons.GetAddonsExcuteJob(resourceName, node.Name, c.Services.KubeAPI.Image)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to deploy addon execute job: %v", err)
|
||||
return &addonError{fmt.Sprintf("Failed to generate addon execute job: %v", err), isCritical}
|
||||
}
|
||||
err = c.ApplySystemAddonExcuteJob(addonJob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to deploy addon execute job: %v", err)
|
||||
return &addonError{fmt.Sprintf("%v", err), isCritical}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -234,7 +255,7 @@ func (c *Cluster) StoreAddonConfigMap(ctx context.Context, addonYaml string, add
|
||||
}
|
||||
|
||||
func (c *Cluster) ApplySystemAddonExcuteJob(addonJob string) error {
|
||||
if err := k8s.ApplyK8sSystemJob(addonJob, c.LocalKubeConfigPath, c.K8sWrapTransport); err != nil {
|
||||
if err := k8s.ApplyK8sSystemJob(addonJob, c.LocalKubeConfigPath, c.K8sWrapTransport, c.AddonJobTimeout); err != nil {
|
||||
logrus.Error(err)
|
||||
return err
|
||||
}
|
||||
@ -261,7 +282,7 @@ func (c *Cluster) deployIngress(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.doAddonDeploy(ctx, ingressYaml, IngressAddonResourceName); err != nil {
|
||||
if err := c.doAddonDeploy(ctx, ingressYaml, IngressAddonResourceName, false); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof(ctx, "[ingress] ingress controller %s is successfully deployed", c.Ingress.Provider)
|
||||
|
@ -289,7 +289,14 @@ func (c *Cluster) deployAddons(ctx context.Context) error {
|
||||
if err := c.deployK8sAddOns(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.deployUserAddOns(ctx)
|
||||
if err := c.deployUserAddOns(ctx); err != nil {
|
||||
if err, ok := err.(*addonError); ok && err.isCritical {
|
||||
return err
|
||||
}
|
||||
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", UserAddonsIncludeResourceName, err)
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) SyncLabelsAndTaints(ctx context.Context) error {
|
||||
@ -349,9 +356,14 @@ func ConfigureCluster(
|
||||
if len(kubeCluster.ControlPlaneHosts) > 0 {
|
||||
kubeCluster.Certificates = crtBundle
|
||||
if err := kubeCluster.deployNetworkPlugin(ctx); err != nil {
|
||||
if err, ok := err.(*addonError); ok && err.isCritical {
|
||||
return err
|
||||
}
|
||||
log.Warnf(ctx, "Failed to deploy addon execute job [%s]: %v", NetworkPluginResourceName, err)
|
||||
}
|
||||
if err := kubeCluster.deployAddons(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return kubeCluster.deployAddons(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package cluster
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/rancher/rke/k8s"
|
||||
"github.com/rancher/rke/log"
|
||||
"github.com/rancher/rke/services"
|
||||
"github.com/rancher/types/apis/management.cattle.io/v3"
|
||||
@ -87,6 +88,9 @@ func (c *Cluster) setClusterDefaults(ctx context.Context) {
|
||||
if len(c.Version) == 0 {
|
||||
c.Version = DefaultK8sVersion
|
||||
}
|
||||
if c.AddonJobTimeout == 0 {
|
||||
c.AddonJobTimeout = k8s.DefaultTimeout
|
||||
}
|
||||
c.setClusterImageDefaults()
|
||||
c.setClusterServicesDefaults()
|
||||
c.setClusterNetworkDefaults()
|
||||
|
@ -41,26 +41,15 @@ const (
|
||||
ProtocolUDP = "UDP"
|
||||
|
||||
FlannelNetworkPlugin = "flannel"
|
||||
FlannelImage = "flannel_image"
|
||||
FlannelCNIImage = "flannel_cni_image"
|
||||
FlannelIface = "flannel_iface"
|
||||
|
||||
CalicoNetworkPlugin = "calico"
|
||||
CalicoNodeImage = "calico_node_image"
|
||||
CalicoCNIImage = "calico_cni_image"
|
||||
CalicoControllersImage = "calico_controllers_image"
|
||||
CalicoctlImage = "calicoctl_image"
|
||||
CalicoCloudProvider = "calico_cloud_provider"
|
||||
CalicoNetworkPlugin = "calico"
|
||||
CalicoCloudProvider = "calico_cloud_provider"
|
||||
|
||||
CanalNetworkPlugin = "canal"
|
||||
CanalNodeImage = "canal_node_image"
|
||||
CanalCNIImage = "canal_cni_image"
|
||||
CanalFlannelImage = "canal_flannel_image"
|
||||
CanalIface = "canal_iface"
|
||||
|
||||
WeaveNetworkPlugin = "weave"
|
||||
WeaveImage = "weave_node_image"
|
||||
WeaveCNIImage = "weave_cni_image"
|
||||
|
||||
// List of map keys to be used with network templates
|
||||
|
||||
@ -140,7 +129,7 @@ func (c *Cluster) doFlannelDeploy(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
|
||||
}
|
||||
|
||||
func (c *Cluster) doCalicoDeploy(ctx context.Context) error {
|
||||
@ -158,7 +147,7 @@ func (c *Cluster) doCalicoDeploy(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
|
||||
}
|
||||
|
||||
func (c *Cluster) doCanalDeploy(ctx context.Context) error {
|
||||
@ -180,7 +169,7 @@ func (c *Cluster) doCanalDeploy(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
|
||||
}
|
||||
|
||||
func (c *Cluster) doWeaveDeploy(ctx context.Context) error {
|
||||
@ -194,7 +183,7 @@ func (c *Cluster) doWeaveDeploy(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName)
|
||||
return c.doAddonDeploy(ctx, pluginYaml, NetworkPluginResourceName, true)
|
||||
}
|
||||
|
||||
func (c *Cluster) getNetworkPluginManifest(pluginConfig map[string]string) (string, error) {
|
||||
|
@ -12,7 +12,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTransport) error {
|
||||
func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTransport, timeout int) error {
|
||||
job := v1.Job{}
|
||||
if err := decodeYamlResource(&job, jobYaml); err != nil {
|
||||
return err
|
||||
@ -32,7 +32,7 @@ func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport WrapTran
|
||||
return err
|
||||
}
|
||||
logrus.Debugf("[k8s] waiting for job %s to complete..", job.Name)
|
||||
return retryTo(ensureJobCompleted, k8sClient, job, DefaultRetries, DefaultSleepSeconds)
|
||||
return retryToWithTimeout(ensureJobCompleted, k8sClient, job, timeout)
|
||||
}
|
||||
|
||||
func ensureJobCompleted(k8sClient *kubernetes.Clientset, j interface{}) error {
|
||||
|
15
k8s/k8s.go
15
k8s/k8s.go
@ -13,6 +13,7 @@ import (
|
||||
const (
|
||||
DefaultRetries = 5
|
||||
DefaultSleepSeconds = 5
|
||||
DefaultTimeout = 30
|
||||
K8sWrapTransportTimeout = 30
|
||||
)
|
||||
|
||||
@ -42,6 +43,20 @@ func decodeYamlResource(resource interface{}, yamlManifest string) error {
|
||||
return decoder.Decode(&resource)
|
||||
}
|
||||
|
||||
func retryToWithTimeout(runFunc k8sCall, k8sClient *kubernetes.Clientset, resource interface{}, timeout int) error {
|
||||
var err error
|
||||
timePassed := 0
|
||||
for timePassed < timeout {
|
||||
if err = runFunc(k8sClient, resource); err != nil {
|
||||
time.Sleep(time.Second * time.Duration(DefaultSleepSeconds))
|
||||
timePassed += DefaultSleepSeconds
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func retryTo(runFunc k8sCall, k8sClient *kubernetes.Clientset, resource interface{}, retries, sleepSeconds int) error {
|
||||
var err error
|
||||
if retries == 0 {
|
||||
|
Loading…
Reference in New Issue
Block a user