diff --git a/cluster/certificates.go b/cluster/certificates.go index 5b3c0d77..18260c5b 100644 --- a/cluster/certificates.go +++ b/cluster/certificates.go @@ -32,6 +32,26 @@ func SetUpAuthentication(kubeCluster, currentCluster *Cluster) error { return nil } +func regenerateAPICertificate(c *Cluster, certificates map[string]pki.CertificatePKI) (map[string]pki.CertificatePKI, error) { + logrus.Debugf("[certificates] Regenerating kubeAPI certificate") + kubeAPIAltNames := pki.GetAltNames(c.ControlPlaneHosts, c.ClusterDomain, c.KubernetesServiceIP) + caCrt := certificates[pki.CACertName].Certificate + caKey := certificates[pki.CACertName].Key + kubeAPICert, kubeAPIKey, err := pki.GenerateKubeAPICertAndKey(caCrt, caKey, kubeAPIAltNames) + if err != nil { + return nil, err + } + certificates[pki.KubeAPICertName] = pki.CertificatePKI{ + Certificate: kubeAPICert, + Key: kubeAPIKey, + Config: certificates[pki.KubeAPICertName].Config, + EnvName: certificates[pki.KubeAPICertName].EnvName, + ConfigEnvName: certificates[pki.KubeAPICertName].ConfigEnvName, + KeyEnvName: certificates[pki.KubeAPICertName].KeyEnvName, + } + return certificates, nil +} + func getClusterCerts(kubeClient *kubernetes.Clientset) (map[string]pki.CertificatePKI, error) { logrus.Infof("[certificates] Getting Cluster certificates from Kubernetes") certificatesNames := []string{ diff --git a/cluster/cluster.go b/cluster/cluster.go index 281d7a58..5a53713c 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -68,6 +68,12 @@ func ParseConfig(clusterFile string) (*Cluster, error) { if err != nil { return nil, fmt.Errorf("Failed to classify hosts from config file: %v", err) } + + err = c.ValidateCluster() + if err != nil { + return nil, fmt.Errorf("Failed to validate cluster: %v", err) + } + c.KubernetesServiceIP, err = services.GetKubernetesServiceIP(c.Services.KubeAPI.ServiceClusterIPRange) if err != nil { return nil, fmt.Errorf("Failed to get Kubernetes Service IP: %v", err) @@ -135,13 +141,17 @@ func ReconcileCluster(kubeCluster, currentCluster *Cluster) error { logrus.Infof("[reconcile] Check Control plane hosts to be deleted") cpToDelete := hosts.GetToDeleteHosts(currentCluster.ControlPlaneHosts, kubeCluster.ControlPlaneHosts) for _, toDeleteHost := range cpToDelete { - hosts.DeleteNode(&toDeleteHost, kubeClient) + if err := hosts.DeleteNode(&toDeleteHost, kubeClient); err != nil { + return fmt.Errorf("Failed to delete controlplane node %s from cluster", toDeleteHost.AdvertisedHostname) + } } logrus.Infof("[reconcile] Check worker hosts to be deleted") wpToDelete := hosts.GetToDeleteHosts(currentCluster.WorkerHosts, kubeCluster.WorkerHosts) for _, toDeleteHost := range wpToDelete { - hosts.DeleteNode(&toDeleteHost, kubeClient) + if err := hosts.DeleteNode(&toDeleteHost, kubeClient); err != nil { + return fmt.Errorf("Failed to delete worker node %s from cluster", toDeleteHost.AdvertisedHostname) + } } // Rolling update on change for nginx Proxy diff --git a/cluster/state.go b/cluster/state.go index b74f628b..06decc6e 100644 --- a/cluster/state.go +++ b/cluster/state.go @@ -2,6 +2,7 @@ package cluster import ( "fmt" + "os" "time" "github.com/rancher/rke/k8s" @@ -32,21 +33,32 @@ func (c *Cluster) SaveClusterState(clusterFile string) error { func (c *Cluster) GetClusterState() (*Cluster, error) { var err error var currentCluster *Cluster - c.KubeClient, err = k8s.NewClient(c.LocalKubeConfigPath) - if err != nil { - logrus.Warnf("Failed to initiate new Kubernetes Client: %v", err) - } else { - // Handle pervious kubernetes state and certificate generation + + // check if local kubeconfig file exists + if _, err = os.Stat(c.LocalKubeConfigPath); !os.IsNotExist(err) { + logrus.Infof("[state] Found local kube config file, trying to get state from cluster") + + // initiate kubernetes client + c.KubeClient, err = k8s.NewClient(c.LocalKubeConfigPath) + if err != nil { + logrus.Warnf("Failed to initiate new Kubernetes Client: %v", err) + return nil, nil + } + // Get pervious kubernetes state currentCluster = getStateFromKubernetes(c.KubeClient, c.LocalKubeConfigPath) + // Get previous kubernetes certificates if currentCluster != nil { currentCluster.Certificates, err = getClusterCerts(c.KubeClient) if err != nil { return nil, fmt.Errorf("Failed to Get Kubernetes certificates: %v", err) } - err = currentCluster.InvertIndexHosts() - if err != nil { + if err := currentCluster.InvertIndexHosts(); err != nil { return nil, fmt.Errorf("Failed to classify hosts from fetched cluster: %v", err) } + currentCluster.Certificates, err = regenerateAPICertificate(c, currentCluster.Certificates) + if err != nil { + return nil, fmt.Errorf("Failed to regenerate KubeAPI certificate %v", err) + } } } return currentCluster, nil @@ -102,7 +114,7 @@ func getStateFromKubernetes(kubeClient *kubernetes.Clientset, kubeConfigPath str } return ¤tCluster case <-time.After(time.Second * GetStateTimeout): - logrus.Warnf("Timed out waiting for kubernetes cluster") + logrus.Infof("Timed out waiting for kubernetes cluster to get state") return nil } } diff --git a/cluster/validation.go b/cluster/validation.go new file mode 100644 index 00000000..b49f511c --- /dev/null +++ b/cluster/validation.go @@ -0,0 +1,46 @@ +package cluster + +import ( + "fmt" + "strings" +) + +func (c *Cluster) ValidateCluster() error { + // make sure cluster has at least one controlplane/etcd host + if len(c.ControlPlaneHosts) == 0 { + return fmt.Errorf("Cluster must have at least one control plane host") + } + if len(c.EtcdHosts) == 0 { + return fmt.Errorf("Cluster must have at least one etcd host") + } + + // validate services options + err := validateServicesOption(c) + if err != nil { + return err + } + return nil +} + +func validateServicesOption(c *Cluster) error { + servicesOptions := map[string]string{ + "etcd_image": c.Services.Etcd.Image, + "kube_api_image": c.Services.KubeAPI.Image, + "kube_api_service_cluster_ip_range": c.Services.KubeAPI.ServiceClusterIPRange, + "kube_controller_image": c.Services.KubeController.Image, + "kube_controller_service_cluster_ip_range": c.Services.KubeController.ServiceClusterIPRange, + "kube_controller_cluster_cidr": c.Services.KubeController.ClusterCIDR, + "scheduler_image": c.Services.Scheduler.Image, + "kubelet_image": c.Services.Kubelet.Image, + "kubelet_cluster_dns_service": c.Services.Kubelet.ClusterDNSServer, + "kubelet_cluster_domain": c.Services.Kubelet.ClusterDomain, + "kubelet_infra_container_image": c.Services.Kubelet.InfraContainerImage, + "kubeproxy_image": c.Services.Kubeproxy.Image, + } + for optionName, OptionValue := range servicesOptions { + if len(OptionValue) == 0 { + return fmt.Errorf("%s can't be empty", strings.Join(strings.Split(optionName, "_"), " ")) + } + } + return nil +} diff --git a/hosts/hosts.go b/hosts/hosts.go index ac7f2cb3..6cf4fbda 100644 --- a/hosts/hosts.go +++ b/hosts/hosts.go @@ -56,7 +56,7 @@ func DeleteNode(toDeleteHost *Host, kubeClient *kubernetes.Clientset) error { logrus.Infof("[hosts] Cordoning host [%s]", toDeleteHost.AdvertisedHostname) err := k8s.CordonUncordon(kubeClient, toDeleteHost.AdvertisedHostname, true) if err != nil { - return nil + return err } logrus.Infof("[hosts] Deleting host [%s] from the cluster", toDeleteHost.AdvertisedHostname) err = k8s.DeleteNode(kubeClient, toDeleteHost.AdvertisedHostname) diff --git a/k8s/node.go b/k8s/node.go index 42205029..1de6d41e 100644 --- a/k8s/node.go +++ b/k8s/node.go @@ -2,6 +2,7 @@ package k8s import ( "fmt" + "time" "github.com/sirupsen/logrus" "k8s.io/api/core/v1" @@ -23,6 +24,7 @@ func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cordoned b node, err := k8sClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { logrus.Debugf("Error getting node %s: %v", nodeName, err) + time.Sleep(time.Second * 5) continue } if node.Spec.Unschedulable == cordoned { @@ -33,6 +35,7 @@ func CordonUncordon(k8sClient *kubernetes.Clientset, nodeName string, cordoned b _, err = k8sClient.CoreV1().Nodes().Update(node) if err != nil { logrus.Debugf("Error setting cordoned state for node %s: %v", nodeName, err) + time.Sleep(time.Second * 5) continue } updated = true diff --git a/pki/pki.go b/pki/pki.go index c092e289..4fb20a24 100644 --- a/pki/pki.go +++ b/pki/pki.go @@ -57,8 +57,8 @@ func generateCerts(cpHosts []hosts.Host, clusterDomain, localConfigPath string, // generate API certificate and key logrus.Infof("[certificates] Generating Kubernetes API server certificates") - kubeAPIAltNames := getAltNames(cpHosts, clusterDomain, KubernetesServiceIP) - kubeAPICrt, kubeAPIKey, err := generateKubeAPICertAndKey(caCrt, caKey, kubeAPIAltNames) + kubeAPIAltNames := GetAltNames(cpHosts, clusterDomain, KubernetesServiceIP) + kubeAPICrt, kubeAPIKey, err := GenerateKubeAPICertAndKey(caCrt, caKey, kubeAPIAltNames) if err != nil { return nil, err } @@ -198,7 +198,7 @@ func generateClientCertAndKey(caCrt *x509.Certificate, caKey *rsa.PrivateKey, co return clientCert, rootKey, nil } -func generateKubeAPICertAndKey(caCrt *x509.Certificate, caKey *rsa.PrivateKey, altNames *cert.AltNames) (*x509.Certificate, *rsa.PrivateKey, error) { +func GenerateKubeAPICertAndKey(caCrt *x509.Certificate, caKey *rsa.PrivateKey, altNames *cert.AltNames) (*x509.Certificate, *rsa.PrivateKey, error) { rootKey, err := cert.NewPrivateKey() if err != nil { return nil, nil, fmt.Errorf("Failed to generate private key for kube-apiserver certificate: %v", err) @@ -232,7 +232,7 @@ func generateCACertAndKey() (*x509.Certificate, *rsa.PrivateKey, error) { return kubeCACert, rootKey, nil } -func getAltNames(cpHosts []hosts.Host, clusterDomain string, KubernetesServiceIP net.IP) *cert.AltNames { +func GetAltNames(cpHosts []hosts.Host, clusterDomain string, KubernetesServiceIP net.IP) *cert.AltNames { ips := []net.IP{} dnsNames := []string{} for _, host := range cpHosts {