1
0
mirror of https://github.com/rancher/rke.git synced 2025-04-27 11:21:08 +00:00
rke/cluster/validation.go

652 lines
22 KiB
Go

package cluster
import (
"context"
"errors"
"fmt"
"strings"
"github.com/blang/semver"
"github.com/rancher/rke/log"
"github.com/rancher/rke/metadata"
"github.com/rancher/rke/pki"
"github.com/rancher/rke/services"
"github.com/rancher/rke/util"
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/validation"
)
func (c *Cluster) ValidateCluster(ctx context.Context) error {
// validate kubernetes version
// Version Check
if err := validateVersion(ctx, c); err != nil {
return err
}
// validate duplicate nodes
if err := validateDuplicateNodes(c); err != nil {
return err
}
// validate hosts options
if err := validateHostsOptions(c); err != nil {
return err
}
// validate Auth options
if err := validateAuthOptions(c); err != nil {
return err
}
// validate Network options
if err := validateNetworkOptions(c); err != nil {
return err
}
// validate Ingress options
if err := validateIngressOptions(c); err != nil {
return err
}
// validate enabling CRIDockerd
if err := validateCRIDockerdOption(c); err != nil {
return err
}
// validate services options
return validateServicesOptions(c)
}
func validateAuthOptions(c *Cluster) error {
for strategy, enabled := range c.AuthnStrategies {
if !enabled {
continue
}
strategy = strings.ToLower(strategy)
if strategy != AuthnX509Provider && strategy != AuthnWebhookProvider {
return fmt.Errorf("Authentication strategy [%s] is not supported", strategy)
}
}
if !c.AuthnStrategies[AuthnX509Provider] {
return fmt.Errorf("Authentication strategy must contain [%s]", AuthnX509Provider)
}
return nil
}
func transformAciNetworkOption(option string) (string, string) {
var description string
switch option {
case AciSystemIdentifier:
option = "system_id"
description = "unique suffix for all cluster related objects in aci"
case AciServiceGraphSubnet:
option = "node_svc_subnet"
description = "Subnet to use for service graph endpoints on aci"
case AciStaticExternalSubnet:
option = "extern_static"
description = "Subnet to use for static external IPs on aci"
case AciDynamicExternalSubnet:
option = "extern_dynamic"
description = "Subnet to use for dynamic external IPs on aci"
case AciToken:
description = "UUID for this version of the input configuration"
case AciApicUserName:
description = "User name for aci apic"
case AciApicUserKey:
description = "Base64 encoded private key for aci apic user"
case AciApicUserCrt:
description = "Base64 encoded certificate for aci apic user"
case AciEncapType:
description = "One of the supported encap types for aci(vlan/vxlan)"
case AciMcastRangeStart:
description = "Mcast range start address for endpoint groups on aci"
case AciMcastRangeEnd:
description = "Mcast range end address for endpoint groups on aci"
case AciNodeSubnet:
description = "Kubernetes node address subnet"
case AciAEP:
description = "Attachment entity profile name on aci"
case AciVRFName:
description = "VRF Name on aci"
case AciVRFTenant:
description = "Tenant for VRF on aci"
case AciL3Out:
description = "L3Out on aci"
case AciKubeAPIVlan:
description = "Vlan for node network on aci"
case AciServiceVlan:
description = "Vlan for service graph nodes on aci"
case AciInfraVlan:
description = "Vlan for infra network on aci"
}
return option, description
}
func validateAciCloudOptionsDisabled(option string, value string) (string, string, bool) {
var description string
ok := false
switch option {
case AciUseOpflexServerVolume:
if value == DefaultAciUseOpflexServerVolume {
ok = true
}
description = "Use mounted volume for opflex server"
case AciUseHostNetnsVolume:
if value == DefaultAciUseHostNetnsVolume {
ok = true
}
description = "Mount host netns for opflex server"
case AciCApic:
if value == DefaultAciCApic {
ok = true
}
description = "Provision cloud apic"
case AciUseAciAnywhereCRD:
if value == DefaultAciUseAciAnywhereCRD {
ok = true
}
description = "Use Aci anywhere CRD"
case AciRunGbpContainer:
if value == DefaultAciRunGbpContainer {
ok = true
}
description = "Run Gbp Server"
case AciRunOpflexServerContainer:
if value == DefaultAciRunOpflexServerContainer {
ok = true
}
description = "Run Opflex Server"
case AciEpRegistry:
if value == "" {
ok = true
}
description = "Registry for Ep whether CRD or MODB"
case AciOpflexMode:
if value == "" {
ok = true
}
description = "Opflex overlay mode or on-prem"
case AciSubnetDomainName:
if value == "" {
ok = true
}
description = "Subnet domain name"
case AciKafkaClientCrt:
if value == "" {
ok = true
}
description = "CApic Kafka client certificate"
case AciKafkaClientKey:
if value == "" {
ok = true
}
description = "CApic Kafka client key"
case AciOverlayVRFName:
if value == "" {
ok = true
}
description = "Overlay VRF name"
case AciGbpPodSubnet:
if value == "" {
ok = true
}
description = "Gbp pod subnet"
case AciOpflexServerPort:
if value == "" {
ok = true
}
description = "Opflex server port"
}
return option, description, ok
}
func validateNetworkOptions(c *Cluster) error {
if c.Network.Plugin != NoNetworkPlugin && c.Network.Plugin != FlannelNetworkPlugin && c.Network.Plugin != CalicoNetworkPlugin && c.Network.Plugin != CanalNetworkPlugin && c.Network.Plugin != WeaveNetworkPlugin && c.Network.Plugin != AciNetworkPlugin {
return fmt.Errorf("Network plugin [%s] is not supported", c.Network.Plugin)
}
if c.Network.Plugin == FlannelNetworkPlugin && c.Network.MTU != 0 {
return fmt.Errorf("Network plugin [%s] does not support configuring MTU", FlannelNetworkPlugin)
}
dualStack := false
serviceClusterRanges := strings.Split(c.Services.KubeAPI.ServiceClusterIPRange, ",")
if len(serviceClusterRanges) > 1 {
logrus.Debugf("Found more than 1 service cluster IP range, assuming dual stack")
dualStack = true
}
clusterCIDRs := strings.Split(c.Services.KubeController.ClusterCIDR, ",")
if len(clusterCIDRs) > 1 {
logrus.Debugf("Found more than 1 cluster CIDR, assuming dual stack")
dualStack = true
}
if dualStack {
IPv6CompatibleNetworkPluginFound := false
for _, networkPlugin := range IPv6CompatibleNetworkPlugins {
if c.Network.Plugin == networkPlugin {
logrus.Debugf("Found IPv6 compatible network plugin [%s] == [%s]", c.Network.Plugin, networkPlugin)
IPv6CompatibleNetworkPluginFound = true
break
}
}
if !IPv6CompatibleNetworkPluginFound {
return fmt.Errorf("Network plugin [%s] does not support IPv6 (dualstack)", c.Network.Plugin)
}
}
if c.Network.Plugin == AciNetworkPlugin {
//Skip cloud options and throw an error.
cloudOptionsList := []string{AciEpRegistry, AciOpflexMode, AciUseHostNetnsVolume, AciUseOpflexServerVolume,
AciSubnetDomainName, AciKafkaClientCrt, AciKafkaClientKey, AciCApic, UseAciAnywhereCRD,
AciOverlayVRFName, AciGbpPodSubnet, AciRunGbpContainer, AciRunOpflexServerContainer, AciOpflexServerPort}
for _, v := range cloudOptionsList {
val, ok := c.Network.Options[v]
_, _, disabled := validateAciCloudOptionsDisabled(v, val)
if ok && !disabled {
return fmt.Errorf("Network plugin aci: %s = %s is provided,but cloud options are not allowed in this release", v, val)
}
}
networkOptionsList := []string{AciSystemIdentifier, AciToken, AciApicUserName, AciApicUserKey,
AciApicUserCrt, AciEncapType, AciMcastRangeStart, AciMcastRangeEnd,
AciNodeSubnet, AciAEP, AciVRFName, AciVRFTenant, AciL3Out, AciDynamicExternalSubnet,
AciStaticExternalSubnet, AciServiceGraphSubnet, AciKubeAPIVlan, AciServiceVlan, AciInfraVlan,
AciNodeSubnet}
for _, v := range networkOptionsList {
val, ok := c.Network.Options[v]
if !ok || val == "" {
var description string
v, description = transformAciNetworkOption(v)
return fmt.Errorf("Network plugin aci: %s(%s) under aci_network_provider is not provided", strings.TrimPrefix(v, "aci_"), description)
}
}
if c.Network.AciNetworkProvider != nil {
if c.Network.AciNetworkProvider.ApicHosts == nil {
return fmt.Errorf("Network plugin aci: %s(address of aci apic hosts) under aci_network_provider is not provided", "apic_hosts")
}
if c.Network.AciNetworkProvider.L3OutExternalNetworks == nil {
return fmt.Errorf("Network plugin aci: %s(external network name/s on aci) under aci_network_provider is not provided", "l3out_external_networks")
}
} else {
var requiredArgs []string
for _, v := range networkOptionsList {
v, _ = transformAciNetworkOption(v)
requiredArgs = append(requiredArgs, fmt.Sprintf(" %s", strings.TrimPrefix("aci_", v)))
}
requiredArgs = append(requiredArgs, fmt.Sprintf(" %s", ApicHosts))
requiredArgs = append(requiredArgs, fmt.Sprintf(" %s", L3OutExternalNetworks))
return fmt.Errorf("Network plugin aci: multiple parameters under aci_network_provider are not provided: %s", requiredArgs)
}
}
return nil
}
func validateHostsOptions(c *Cluster) error {
for i, host := range c.Nodes {
if len(host.Address) == 0 {
return fmt.Errorf("Address for host (%d) is not provided", i+1)
}
if len(host.User) == 0 {
return fmt.Errorf("User for host (%d) is not provided", i+1)
}
if len(host.Role) == 0 {
return fmt.Errorf("Role for host (%d) is not provided", i+1)
}
if errs := validation.IsDNS1123Subdomain(host.HostnameOverride); len(errs) > 0 {
return fmt.Errorf("Hostname_override [%s] for host (%d) is not valid: %v", host.HostnameOverride, i+1, errs)
}
for _, role := range host.Role {
if role != services.ETCDRole && role != services.ControlRole && role != services.WorkerRole {
return fmt.Errorf("Role [%s] for host (%d) is not recognized", role, i+1)
}
}
}
return nil
}
func validateServicesOptions(c *Cluster) error {
servicesOptions := map[string]string{
"etcd_image": c.Services.Etcd.Image,
"kube_api_image": c.Services.KubeAPI.Image,
"kube_api_service_cluster_ip_range": c.Services.KubeAPI.ServiceClusterIPRange,
"kube_controller_image": c.Services.KubeController.Image,
"kube_controller_service_cluster_ip_range": c.Services.KubeController.ServiceClusterIPRange,
"kube_controller_cluster_cidr": c.Services.KubeController.ClusterCIDR,
"scheduler_image": c.Services.Scheduler.Image,
"kubelet_image": c.Services.Kubelet.Image,
"kubelet_cluster_dns_service": c.Services.Kubelet.ClusterDNSServer,
"kubelet_cluster_domain": c.Services.Kubelet.ClusterDomain,
"kubelet_infra_container_image": c.Services.Kubelet.InfraContainerImage,
"kubeproxy_image": c.Services.Kubeproxy.Image,
}
for optionName, OptionValue := range servicesOptions {
if len(OptionValue) == 0 {
return fmt.Errorf("%s can't be empty", strings.Join(strings.Split(optionName, "_"), " "))
}
}
// Validate external etcd information
if len(c.Services.Etcd.ExternalURLs) > 0 {
if len(c.Services.Etcd.CACert) == 0 {
return errors.New("External CA Certificate for etcd can't be empty")
}
if len(c.Services.Etcd.Cert) == 0 {
return errors.New("External Client Certificate for etcd can't be empty")
}
if len(c.Services.Etcd.Key) == 0 {
return errors.New("External Client Key for etcd can't be empty")
}
if len(c.Services.Etcd.Path) == 0 {
return errors.New("External etcd path can't be empty")
}
}
// validate etcd s3 backup backend configurations
return validateEtcdBackupOptions(c)
}
func validateEtcdBackupOptions(c *Cluster) error {
if c.Services.Etcd.BackupConfig != nil {
if c.Services.Etcd.BackupConfig.S3BackupConfig != nil {
if len(c.Services.Etcd.BackupConfig.S3BackupConfig.Endpoint) == 0 {
return errors.New("etcd s3 backup backend endpoint can't be empty")
}
if len(c.Services.Etcd.BackupConfig.S3BackupConfig.BucketName) == 0 {
return errors.New("etcd s3 backup backend bucketName can't be empty")
}
if len(c.Services.Etcd.BackupConfig.S3BackupConfig.CustomCA) != 0 {
if isValid, err := pki.IsValidCertStr(c.Services.Etcd.BackupConfig.S3BackupConfig.CustomCA); !isValid {
return fmt.Errorf("invalid S3 endpoint CA certificate: %v", err)
}
}
}
}
return nil
}
func validateIngressOptions(c *Cluster) error {
// Should be changed when adding more ingress types
if c.Ingress.Provider != DefaultIngressController && c.Ingress.Provider != "none" {
return fmt.Errorf("Ingress controller %s is incorrect", c.Ingress.Provider)
}
if c.Ingress.DNSPolicy != "" &&
!(c.Ingress.DNSPolicy == string(v1.DNSClusterFirst) ||
c.Ingress.DNSPolicy == string(v1.DNSClusterFirstWithHostNet) ||
c.Ingress.DNSPolicy == string(v1.DNSNone) ||
c.Ingress.DNSPolicy == string(v1.DNSDefault)) {
return fmt.Errorf("DNSPolicy %s was not a valid DNS Policy", c.Ingress.DNSPolicy)
}
if c.Ingress.NetworkMode == "hostPort" {
if !(c.Ingress.HTTPPort >= 0 && c.Ingress.HTTPPort <= 65535) {
return fmt.Errorf("https port is invalid. Needs to be within 0 to 65535")
}
if !(c.Ingress.HTTPPort >= 0 && c.Ingress.HTTPPort <= 65535) {
return fmt.Errorf("http port is invalid. Needs to be within 0 to 65535")
}
if c.Ingress.HTTPPort != 0 && c.Ingress.HTTPSPort != 0 &&
(c.Ingress.HTTPPort == c.Ingress.HTTPSPort) {
return fmt.Errorf("http and https ports need to be different")
}
}
if !(c.Ingress.NetworkMode == "" ||
c.Ingress.NetworkMode == "hostNetwork" || c.Ingress.NetworkMode == "hostPort" ||
c.Ingress.NetworkMode == "none") {
return fmt.Errorf("NetworkMode %s was not a valid network mode", c.Ingress.NetworkMode)
}
return nil
}
func ValidateHostCount(c *Cluster) error {
if len(c.EtcdHosts) == 0 && len(c.Services.Etcd.ExternalURLs) == 0 {
failedEtcdHosts := []string{}
for _, host := range c.InactiveHosts {
if host.IsEtcd {
failedEtcdHosts = append(failedEtcdHosts, host.Address)
}
return fmt.Errorf("Cluster must have at least one etcd plane host: failed to connect to the following etcd host(s) %v", failedEtcdHosts)
}
return errors.New("Cluster must have at least one etcd plane host: please specify one or more etcd in cluster config")
}
if len(c.EtcdHosts) > 0 && len(c.Services.Etcd.ExternalURLs) > 0 {
return errors.New("Cluster can't have both internal and external etcd")
}
return nil
}
func validateDuplicateNodes(c *Cluster) error {
addresses := make(map[string]struct{}, len(c.Nodes))
hostnames := make(map[string]struct{}, len(c.Nodes))
for i := range c.Nodes {
if _, ok := addresses[c.Nodes[i].Address]; ok {
return fmt.Errorf("Cluster can't have duplicate node: %s", c.Nodes[i].Address)
}
addresses[c.Nodes[i].Address] = struct{}{}
if _, ok := hostnames[c.Nodes[i].HostnameOverride]; ok {
return fmt.Errorf("Cluster can't have duplicate node: %s", c.Nodes[i].HostnameOverride)
}
hostnames[c.Nodes[i].HostnameOverride] = struct{}{}
}
return nil
}
func validateVersion(ctx context.Context, c *Cluster) error {
_, err := util.StrToSemVer(c.Version)
if err != nil {
return fmt.Errorf("%s is not valid semver", c.Version)
}
_, ok := metadata.K8sVersionToRKESystemImages[c.Version]
if !ok {
if err := validateSystemImages(c); err != nil {
return fmt.Errorf("%s is an unsupported Kubernetes version and system images are not populated: %v", c.Version, err)
}
return nil
}
if _, ok := metadata.K8sBadVersions[c.Version]; ok {
log.Warnf(ctx, "%s version exists but its recommended to install this version - see 'rke config --system-images --all' for versions supported with this release", c.Version)
return fmt.Errorf("%s is an unsupported Kubernetes version and system images are not populated: %v", c.Version, err)
}
return nil
}
func validateSystemImages(c *Cluster) error {
if err := validateKubernetesImages(c); err != nil {
return err
}
if err := validateNetworkImages(c); err != nil {
return err
}
if err := validateDNSImages(c); err != nil {
return err
}
if err := validateMetricsImages(c); err != nil {
return err
}
return validateIngressImages(c)
}
func validateKubernetesImages(c *Cluster) error {
if len(c.SystemImages.Etcd) == 0 {
return errors.New("etcd image is not populated")
}
if len(c.SystemImages.Kubernetes) == 0 {
return errors.New("kubernetes image is not populated")
}
if len(c.SystemImages.PodInfraContainer) == 0 {
return errors.New("pod infrastructure container image is not populated")
}
if len(c.SystemImages.Alpine) == 0 {
return errors.New("alpine image is not populated")
}
if len(c.SystemImages.NginxProxy) == 0 {
return errors.New("nginx proxy image is not populated")
}
if len(c.SystemImages.CertDownloader) == 0 {
return errors.New("certificate downloader image is not populated")
}
if len(c.SystemImages.KubernetesServicesSidecar) == 0 {
return errors.New("kubernetes sidecar image is not populated")
}
return nil
}
func validateNetworkImages(c *Cluster) error {
// check network provider images
if c.Network.Plugin == FlannelNetworkPlugin {
if len(c.SystemImages.Flannel) == 0 {
return errors.New("flannel image is not populated")
}
if len(c.SystemImages.FlannelCNI) == 0 {
return errors.New("flannel cni image is not populated")
}
} else if c.Network.Plugin == CanalNetworkPlugin {
if len(c.SystemImages.CanalNode) == 0 {
return errors.New("canal image is not populated")
}
if len(c.SystemImages.CanalCNI) == 0 {
return errors.New("canal cni image is not populated")
}
if len(c.SystemImages.CanalFlannel) == 0 {
return errors.New("flannel image is not populated")
}
} else if c.Network.Plugin == CalicoNetworkPlugin {
if len(c.SystemImages.CalicoCNI) == 0 {
return errors.New("calico cni image is not populated")
}
if len(c.SystemImages.CalicoCtl) == 0 {
return errors.New("calico ctl image is not populated")
}
if len(c.SystemImages.CalicoNode) == 0 {
return errors.New("calico image is not populated")
}
if len(c.SystemImages.CalicoControllers) == 0 {
return errors.New("calico controllers image is not populated")
}
} else if c.Network.Plugin == WeaveNetworkPlugin {
if len(c.SystemImages.WeaveCNI) == 0 {
return errors.New("weave cni image is not populated")
}
if len(c.SystemImages.WeaveNode) == 0 {
return errors.New("weave image is not populated")
}
} else if c.Network.Plugin == AciNetworkPlugin {
if len(c.SystemImages.AciCniDeployContainer) == 0 {
return errors.New("aci cnideploy image is not populated")
}
if len(c.SystemImages.AciHostContainer) == 0 {
return errors.New("aci host container image is not populated")
}
if len(c.SystemImages.AciOpflexContainer) == 0 {
return errors.New("aci opflex agent image is not populated")
}
if len(c.SystemImages.AciMcastContainer) == 0 {
return errors.New("aci mcast container image is not populated")
}
if len(c.SystemImages.AciOpenvSwitchContainer) == 0 {
return errors.New("aci openvswitch image is not populated")
}
if len(c.SystemImages.AciControllerContainer) == 0 {
return errors.New("aci controller image is not populated")
}
//Skipping Cloud image validation.
//c.SystemImages.AciOpflexServerContainer
//c.SystemImages.AciGbpServerContainer
}
return nil
}
func validateDNSImages(c *Cluster) error {
// check dns provider images
if c.DNS.Provider == "kube-dns" {
if len(c.SystemImages.KubeDNS) == 0 {
return errors.New("kubedns image is not populated")
}
if len(c.SystemImages.DNSmasq) == 0 {
return errors.New("dnsmasq image is not populated")
}
if len(c.SystemImages.KubeDNSSidecar) == 0 {
return errors.New("kubedns sidecar image is not populated")
}
if len(c.SystemImages.KubeDNSAutoscaler) == 0 {
return errors.New("kubedns autoscaler image is not populated")
}
} else if c.DNS.Provider == "coredns" {
if len(c.SystemImages.CoreDNS) == 0 {
return errors.New("coredns image is not populated")
}
if len(c.SystemImages.CoreDNSAutoscaler) == 0 {
return errors.New("coredns autoscaler image is not populated")
}
}
if c.DNS.Nodelocal != nil && len(c.SystemImages.Nodelocal) == 0 {
return errors.New("nodelocal image is not populated")
}
return nil
}
func validateMetricsImages(c *Cluster) error {
// checl metrics server image
if c.Monitoring.Provider != "none" {
if len(c.SystemImages.MetricsServer) == 0 {
return errors.New("metrics server images is not populated")
}
}
return nil
}
func validateIngressImages(c *Cluster) error {
// check ingress images
if c.Ingress.Provider != "none" {
if len(c.SystemImages.Ingress) == 0 {
return errors.New("ingress image is not populated")
}
if len(c.SystemImages.IngressBackend) == 0 {
return errors.New("ingress backend image is not populated")
}
// Ingress Webhook image is used starting with k8s 1.21
k8sVersion := c.RancherKubernetesEngineConfig.Version
toMatch, err := semver.Make(k8sVersion[1:])
if err != nil {
return fmt.Errorf("%s is not valid semver", k8sVersion)
}
logrus.Debugf("Checking if ingress webhook image is required for cluster version [%s]", k8sVersion)
IngressWebhookImageRequiredRange, err := semver.ParseRange(">=1.21.0-rancher0")
if err != nil {
logrus.Warnf("Failed to parse semver range for checking required ingress webhook image")
}
if IngressWebhookImageRequiredRange(toMatch) {
logrus.Debugf("Cluster version [%s] uses ingress webhook image, checking if image is populated", k8sVersion)
if len(c.SystemImages.IngressWebhook) == 0 {
return errors.New("ingress webhook image is not populated")
}
}
}
return nil
}
func validateCRIDockerdOption(c *Cluster) error {
if c.EnableCRIDockerd != nil && *c.EnableCRIDockerd {
k8sVersion := c.RancherKubernetesEngineConfig.Version
toMatch, err := semver.Make(k8sVersion[1:])
if err != nil {
return fmt.Errorf("%s is not valid semver", k8sVersion)
}
logrus.Debugf("Checking cri-dockerd for cluster version [%s]", k8sVersion)
// cri-dockerd can be enabled for k8s 1.21 and up
CRIDockerdAllowedRange, err := semver.ParseRange(">=1.21.0-rancher0")
if err != nil {
logrus.Warnf("Failed to parse semver range for checking cri-dockerd")
}
if !CRIDockerdAllowedRange(toMatch) {
logrus.Debugf("Cluster version [%s] is not allowed to enable cri-dockerd", k8sVersion)
return fmt.Errorf("Enabling cri-dockerd for cluster version [%s] is not supported", k8sVersion)
}
logrus.Infof("cri-dockerd is enabled for cluster version [%s]", k8sVersion)
}
return nil
}