diff --git a/cluster/certificates.go b/cluster/certificates.go index 843b50ea..7ed3ceed 100644 --- a/cluster/certificates.go +++ b/cluster/certificates.go @@ -203,3 +203,18 @@ func RotateRKECertificates(ctx context.Context, c *Cluster, flags ExternalFlags, clusterState.DesiredState.RancherKubernetesEngineConfig = &c.RancherKubernetesEngineConfig return nil } + +func GetClusterCertsFromNodes(ctx context.Context, kubeCluster *Cluster) (map[string]pki.CertificatePKI, error) { + log.Infof(ctx, "[certificates] Fetching kubernetes certificates from nodes") + var err error + backupHosts := hosts.GetUniqueHostList(kubeCluster.EtcdHosts, kubeCluster.ControlPlaneHosts, nil) + certificates := map[string]pki.CertificatePKI{} + for _, host := range backupHosts { + certificates, err = pki.FetchCertificatesFromHost(ctx, kubeCluster.EtcdHosts, host, kubeCluster.SystemImages.Alpine, kubeCluster.LocalKubeConfigPath, kubeCluster.PrivateRegistriesMap) + if certificates != nil { + return certificates, nil + } + } + // reporting the last error only. + return nil, err +} diff --git a/cluster/state.go b/cluster/state.go index a42c0db0..bd584842 100644 --- a/cluster/state.go +++ b/cluster/state.go @@ -6,6 +6,7 @@ import ( "fmt" "io/ioutil" "os" + "path" "path/filepath" "strings" "time" @@ -264,3 +265,29 @@ func removeStateFile(ctx context.Context, statePath string) { } log.Infof(ctx, "State file removed successfully") } + +func GetStateFromNodes(ctx context.Context, kubeCluster *Cluster) *Cluster { + log.Infof(ctx, "[state] Fetching cluster state from Nodes") + var currentCluster Cluster + var clusterFile string + var err error + + uniqueHosts := hosts.GetUniqueHostList(kubeCluster.EtcdHosts, kubeCluster.ControlPlaneHosts, kubeCluster.WorkerHosts) + for _, host := range uniqueHosts { + filePath := path.Join(host.PrefixPath, pki.TempCertPath, pki.ClusterStateFile) + clusterFile, err = pki.FetchFileFromHost(ctx, filePath, kubeCluster.SystemImages.Alpine, host, kubeCluster.PrivateRegistriesMap, pki.StateDeployerContainerName, "state") + if err == nil { + break + } + } + if len(clusterFile) == 0 { + return nil + } + err = yaml.Unmarshal([]byte(clusterFile), ¤tCluster) + if err != nil { + logrus.Debugf("[state] Failed to unmarshal the cluster file fetched from nodes: %v", err) + return nil + } + log.Infof(ctx, "[state] Successfully fetched cluster state from Nodes") + return ¤tCluster +} diff --git a/cmd/common.go b/cmd/common.go index 68acc175..6929a965 100644 --- a/cmd/common.go +++ b/cmd/common.go @@ -82,7 +82,7 @@ func ClusterInit(ctx context.Context, rkeConfig *v3.RancherKubernetesEngineConfi return err } - err = doUpgradeLegacyCluster(ctx, kubeCluster, rkeFullState) + err = doUpgradeLegacyCluster(ctx, kubeCluster, rkeFullState, flags) if err != nil { log.Warnf(ctx, "[state] can't fetch legacy cluster state from Kubernetes") } @@ -128,7 +128,7 @@ func setS3OptionsFromCLI(c *cli.Context) *v3.S3BackupConfig { return s3BackupBackend } -func doUpgradeLegacyCluster(ctx context.Context, kubeCluster *cluster.Cluster, fullState *cluster.FullState) error { +func doUpgradeLegacyCluster(ctx context.Context, kubeCluster *cluster.Cluster, fullState *cluster.FullState, flags cluster.ExternalFlags) error { if _, err := os.Stat(kubeCluster.LocalKubeConfigPath); os.IsNotExist(err) { // there is no kubeconfig. This is a new cluster logrus.Debug("[state] local kubeconfig not found, this is a new cluster") @@ -147,13 +147,24 @@ func doUpgradeLegacyCluster(ctx context.Context, kubeCluster *cluster.Cluster, f } recoveredCluster, err := cluster.GetStateFromKubernetes(ctx, kubeCluster) if err != nil { - return err + log.Warnf(ctx, "Failed to fetch state from kubernetes: %v", err) + // try to fetch state from nodes + err = kubeCluster.TunnelHosts(ctx, flags) + if err != nil { + return err + } + recoveredCluster = cluster.GetStateFromNodes(ctx, kubeCluster) } // if we found a recovered cluster, we will need override the current state if recoveredCluster != nil { recoveredCerts, err := cluster.GetClusterCertsFromKubernetes(ctx, kubeCluster) if err != nil { - return err + log.Warnf(ctx, "Failed to fetch certs from kubernetes: %v", err) + // try to fetch certs from nodes + recoveredCerts, err = cluster.GetClusterCertsFromNodes(ctx, kubeCluster) + if err != nil { + return err + } } fullState.CurrentState.RancherKubernetesEngineConfig = recoveredCluster.RancherKubernetesEngineConfig.DeepCopy() fullState.CurrentState.CertificatesBundle = recoveredCerts diff --git a/cmd/etcd.go b/cmd/etcd.go index 5edfed10..79063dda 100644 --- a/cmd/etcd.go +++ b/cmd/etcd.go @@ -117,7 +117,7 @@ func RestoreEtcdSnapshot( } stateFilePath := cluster.GetStateFilePath(flags.ClusterFilePath, flags.ConfigDir) rkeFullState, _ := cluster.ReadStateFile(ctx, stateFilePath) - if err := doUpgradeLegacyCluster(ctx, kubeCluster, rkeFullState); err != nil { + if err := doUpgradeLegacyCluster(ctx, kubeCluster, rkeFullState, flags); err != nil { return APIURL, caCrt, clientCert, clientKey, nil, err } diff --git a/pki/deploy.go b/pki/deploy.go index 2f2d8c0e..16772fec 100644 --- a/pki/deploy.go +++ b/pki/deploy.go @@ -2,10 +2,12 @@ package pki import ( "context" + "crypto/rsa" "fmt" "io/ioutil" "os" "path" + "strings" "time" "github.com/docker/docker/api/types" @@ -15,6 +17,7 @@ import ( "github.com/rancher/rke/log" v3 "github.com/rancher/types/apis/management.cattle.io/v3" "github.com/sirupsen/logrus" + "k8s.io/client-go/util/cert" ) const ( @@ -147,3 +150,98 @@ func DeployCertificatesOnHost(ctx context.Context, host *hosts.Host, crtMap map[ } return doRunDeployer(ctx, host, env, certDownloaderImage, prsMap) } + +func FetchCertificatesFromHost(ctx context.Context, extraHosts []*hosts.Host, host *hosts.Host, image, localConfigPath string, prsMap map[string]v3.PrivateRegistry) (map[string]CertificatePKI, error) { + // rebuilding the certificates. This should look better after refactoring pki + tmpCerts := make(map[string]CertificatePKI) + + crtList := map[string]bool{ + CACertName: false, + KubeAPICertName: false, + KubeControllerCertName: true, + KubeSchedulerCertName: true, + KubeProxyCertName: true, + KubeNodeCertName: true, + KubeAdminCertName: false, + RequestHeaderCACertName: false, + APIProxyClientCertName: false, + } + + for _, etcdHost := range extraHosts { + // Fetch etcd certificates + crtList[GetEtcdCrtName(etcdHost.InternalAddress)] = false + } + + for certName, config := range crtList { + certificate := CertificatePKI{} + crt, err := FetchFileFromHost(ctx, GetCertTempPath(certName), image, host, prsMap, CertFetcherContainer, "certificates") + // I will only exit with an error if it's not a not-found-error and this is not an etcd certificate + if err != nil && !strings.HasPrefix(certName, "kube-etcd") { + // IsErrNotFound doesn't catch this because it's a custom error + if isFileNotFoundErr(err) { + return nil, nil + } + return nil, err + + } + // If I can't find an etcd I will not fail and will create it later. + if crt == "" && strings.HasPrefix(certName, "kube-etcd") { + tmpCerts[certName] = CertificatePKI{} + continue + } + key, err := FetchFileFromHost(ctx, GetKeyTempPath(certName), image, host, prsMap, CertFetcherContainer, "certificate") + + if config { + config, err := FetchFileFromHost(ctx, GetConfigTempPath(certName), image, host, prsMap, CertFetcherContainer, "certificate") + if err != nil { + return nil, err + } + certificate.Config = config + } + parsedCert, err := cert.ParseCertsPEM([]byte(crt)) + if err != nil { + return nil, err + } + parsedKey, err := cert.ParsePrivateKeyPEM([]byte(key)) + if err != nil { + return nil, err + } + certificate.Certificate = parsedCert[0] + certificate.Key = parsedKey.(*rsa.PrivateKey) + tmpCerts[certName] = certificate + logrus.Debugf("[certificates] Recovered certificate: %s", certName) + } + + if err := docker.RemoveContainer(ctx, host.DClient, host.Address, CertFetcherContainer); err != nil { + return nil, err + } + return populateCertMap(tmpCerts, localConfigPath, extraHosts), nil + +} + +func FetchFileFromHost(ctx context.Context, filePath, image string, host *hosts.Host, prsMap map[string]v3.PrivateRegistry, containerName, state string) (string, error) { + imageCfg := &container.Config{ + Image: image, + } + hostCfg := &container.HostConfig{ + Binds: []string{ + fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(host.PrefixPath, "/etc/kubernetes")), + }, + Privileged: true, + } + isRunning, err := docker.IsContainerRunning(ctx, host.DClient, host.Address, containerName, true) + if err != nil { + return "", err + } + if !isRunning { + if err := docker.DoRunContainer(ctx, host.DClient, imageCfg, hostCfg, containerName, host.Address, state, prsMap); err != nil { + return "", err + } + } + file, err := docker.ReadFileFromContainer(ctx, host.DClient, host.Address, containerName, filePath) + if err != nil { + return "", err + } + + return file, nil +} diff --git a/pki/util.go b/pki/util.go index 74f420bb..00ea5934 100644 --- a/pki/util.go +++ b/pki/util.go @@ -395,7 +395,10 @@ func populateCertMap(tmpCerts map[string]CertificatePKI, localConfigPath string, etcdCrt, etcdKey := tmpCerts[etcdName].Certificate, tmpCerts[etcdName].Key certs[etcdName] = ToCertObject(etcdName, "", "", etcdCrt, etcdKey, nil) } - + // Request header ca + certs[RequestHeaderCACertName] = ToCertObject(RequestHeaderCACertName, "", "", tmpCerts[RequestHeaderCACertName].Certificate, tmpCerts[RequestHeaderCACertName].Key, nil) + // Api proxy client + certs[APIProxyClientCertName] = ToCertObject(APIProxyClientCertName, "", "", tmpCerts[APIProxyClientCertName].Certificate, tmpCerts[APIProxyClientCertName].Key, nil) return certs }