diff --git a/cluster/cluster.go b/cluster/cluster.go index 066f0b36..0e2ffa40 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -790,15 +790,18 @@ func rebuildLocalAdminConfig(ctx context.Context, kubeCluster *Cluster) error { if len(kubeCluster.ControlPlaneHosts) == 0 { return nil } + var activeControlPlaneHostFound bool log.Infof(ctx, "[reconcile] Rebuilding and updating local kube config") var workingConfig, newConfig string currentKubeConfig := kubeCluster.Certificates[pki.KubeAdminCertName] caCrt := kubeCluster.Certificates[pki.CACertName].Certificate for _, cpHost := range kubeCluster.ControlPlaneHosts { if (currentKubeConfig == pki.CertificatePKI{}) { + log.Debugf(ctx, "[reconcile] Rebuilding and updating local kube config, creating new address") kubeCluster.Certificates = make(map[string]pki.CertificatePKI) newConfig = getLocalAdminConfigWithNewAddress(kubeCluster.LocalKubeConfigPath, cpHost.Address, kubeCluster.ClusterName) } else { + log.Debugf(ctx, "[reconcile] Rebuilding and updating local kube config, creating new kubeconfig") kubeURL := fmt.Sprintf("https://%s:6443", cpHost.Address) caData := string(cert.EncodeCertPEM(caCrt)) crtData := string(cert.EncodeCertPEM(currentKubeConfig.Certificate)) @@ -810,9 +813,14 @@ func rebuildLocalAdminConfig(ctx context.Context, kubeCluster *Cluster) error { } workingConfig = newConfig if _, err := GetK8sVersion(kubeCluster.LocalKubeConfigPath, kubeCluster.K8sWrapTransport); err == nil { - log.Infof(ctx, "[reconcile] host [%s] is active master on the cluster", cpHost.Address) + log.Infof(ctx, "[reconcile] host [%s] is a control plane node with reachable Kubernetes API endpoint in the cluster", cpHost.Address) + activeControlPlaneHostFound = true break } + log.Warnf(ctx, "[reconcile] host [%s] is a control plane node without reachable Kubernetes API endpoint in the cluster", cpHost.Address) + } + if !activeControlPlaneHostFound { + log.Warnf(ctx, "[reconcile] no control plane node with reachable Kubernetes API endpoint in the cluster found") } currentKubeConfig.Config = workingConfig kubeCluster.Certificates[pki.KubeAdminCertName] = currentKubeConfig diff --git a/cluster/state.go b/cluster/state.go index a6ecc86e..4fb45aa6 100644 --- a/cluster/state.go +++ b/cluster/state.go @@ -17,6 +17,7 @@ import ( "github.com/rancher/rke/k8s" "github.com/rancher/rke/log" "github.com/rancher/rke/pki" + "github.com/rancher/rke/services" v3 "github.com/rancher/rke/types" "github.com/sirupsen/logrus" "gopkg.in/yaml.v2" @@ -68,6 +69,19 @@ func (c *Cluster) GetClusterState(ctx context.Context, fullState *FullState) (*C return currentCluster, nil } +func (c *Cluster) GetStateFileFromConfigMap(ctx context.Context) (string, error) { + kubeletImage := c.Services.Kubelet.Image + for _, host := range c.ControlPlaneHosts { + stateFile, err := services.RunGetStateFileFromConfigMap(ctx, host, c.PrivateRegistriesMap, kubeletImage) + if err != nil || stateFile == "" { + logrus.Infof("Could not get ConfigMap with cluster state from host [%s]", host.Address) + continue + } + return stateFile, nil + } + return "", fmt.Errorf("Unable to get ConfigMap with cluster state from any Control Plane host") +} + func SaveFullStateToKubernetes(ctx context.Context, kubeCluster *Cluster, fullState *FullState) error { k8sClient, err := k8s.NewClient(kubeCluster.LocalKubeConfigPath, kubeCluster.K8sWrapTransport) if err != nil { diff --git a/cmd/util.go b/cmd/util.go new file mode 100644 index 00000000..ead5cf7e --- /dev/null +++ b/cmd/util.go @@ -0,0 +1,207 @@ +package cmd + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/rancher/rke/cluster" + "github.com/rancher/rke/hosts" + "github.com/rancher/rke/k8s" + "github.com/rancher/rke/pki" + v3 "github.com/rancher/rke/types" + "github.com/rancher/rke/util" + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +func UtilCommand() cli.Command { + utilCfgFlags := []cli.Flag{ + cli.StringFlag{ + Name: "config", + Usage: "Specify an alternate cluster YAML file", + Value: pki.ClusterConfig, + EnvVar: "RKE_CONFIG", + }, + } + utilFlags := append(utilCfgFlags, commonFlags...) + + return cli.Command{ + Name: "util", + Usage: "Various utilities to retrieve cluster related files and troubleshoot", + Subcommands: cli.Commands{ + cli.Command{ + Name: "get-state-file", + Usage: "Retrieve state file from cluster", + Action: getStateFile, + Flags: utilFlags, + }, + cli.Command{ + Name: "get-kubeconfig", + Usage: "Retrieve kubeconfig file from cluster state", + Action: getKubeconfigFile, + Flags: utilFlags, + }, + }, + } +} + +func getKubeconfigFile(ctx *cli.Context) error { + logrus.Infof("Creating new kubeconfig file") + // Check if we can successfully connect to the cluster using the existing kubeconfig file + clusterFile, clusterFilePath, err := resolveClusterFile(ctx) + if err != nil { + return fmt.Errorf("failed to resolve cluster file: %v", err) + } + + // setting up the flags + flags := cluster.GetExternalFlags(false, false, false, false, "", clusterFilePath) + rkeConfig, err := cluster.ParseConfig(clusterFile) + if err != nil { + return fmt.Errorf("failed to parse cluster file: %v", err) + } + + rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig) + if err != nil { + return err + } + + clusterState, err := cluster.ReadStateFile(context.Background(), cluster.GetStateFilePath(flags.ClusterFilePath, flags.ConfigDir)) + if err != nil { + return err + } + + // Creating temp cluster to check if snapshot archive contains state file and retrieve it + tempCluster, err := cluster.InitClusterObject(context.Background(), rkeConfig, flags, "") + if err != nil { + return err + } + + // Move current kubeconfig file + err = util.CopyFileWithPrefix(tempCluster.LocalKubeConfigPath, "kube_config") + if err != nil { + return err + } + kubeCluster, _ := tempCluster.GetClusterState(context.Background(), clusterState) + + if err := cluster.RebuildKubeconfig(context.Background(), kubeCluster); err != nil { + return err + } + + return nil +} + +func getStateFile(ctx *cli.Context) error { + logrus.Infof("Retrieving state file from cluster") + // Check if we can successfully connect to the cluster using the existing kubeconfig file + localKubeConfig := pki.GetLocalKubeConfig(ctx.String("config"), "") + clusterFile, clusterFilePath, err := resolveClusterFile(ctx) + if err != nil { + return fmt.Errorf("failed to resolve cluster file: %v", err) + } + // setting up the flags + flags := cluster.GetExternalFlags(false, false, false, false, "", clusterFilePath) + + // not going to use a k8s dialer here.. this is a CLI command + serverVersion, err := cluster.GetK8sVersion(localKubeConfig, nil) + if err != nil { + logrus.Infof("Unable to connect to server using kubeconfig, trying to get state from Control Plane node(s), error: %v", err) + // We need to retrieve the state file using Docker on the node(s) + + rkeConfig, err := cluster.ParseConfig(clusterFile) + if err != nil { + return fmt.Errorf("failed to parse cluster file: %v", err) + } + + rkeConfig, err = setOptionsFromCLI(ctx, rkeConfig) + if err != nil { + return err + } + + _, _, _, _, _, err = RetrieveClusterStateConfigMap(context.Background(), rkeConfig, hosts.DialersOptions{}, flags, map[string]interface{}{}) + if err != nil { + return err + } + + return nil + } + logrus.Infof("Successfully connected to server using kubeconfig, retrieved server version [%s]", serverVersion) + // Retrieve full-cluster-state configmap + k8sClient, err := k8s.NewClient(localKubeConfig, nil) + cfgMap, err := k8s.GetConfigMap(k8sClient, cluster.FullStateConfigMapName) + clusterData := cfgMap.Data[cluster.FullStateConfigMapName] + rkeFullState := &cluster.FullState{} + if err = json.Unmarshal([]byte(clusterData), rkeFullState); err != nil { + return err + } + + // Move current state file + stateFilePath := cluster.GetStateFilePath(flags.ClusterFilePath, flags.ConfigDir) + err = util.ReplaceFileWithBackup(stateFilePath, "rkestate") + if err != nil { + return err + } + + // Write new state file + err = rkeFullState.WriteStateFile(context.Background(), stateFilePath) + if err != nil { + return err + } + + return nil +} + +func RetrieveClusterStateConfigMap( + ctx context.Context, + rkeConfig *v3.RancherKubernetesEngineConfig, + dialersOptions hosts.DialersOptions, + flags cluster.ExternalFlags, + data map[string]interface{}) (string, string, string, string, map[string]pki.CertificatePKI, error) { + var APIURL, caCrt, clientCert, clientKey string + + rkeFullState := &cluster.FullState{} + + // Creating temp cluster to check if snapshot archive contains state file and retrieve it + tempCluster, err := cluster.InitClusterObject(ctx, rkeConfig, flags, "") + if err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + if err := tempCluster.SetupDialers(ctx, dialersOptions); err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + if err := tempCluster.TunnelHosts(ctx, flags); err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + // Get ConfigMap containing cluster state from Control Plane Hosts + stateFile, err := tempCluster.GetStateFileFromConfigMap(ctx) + + if err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + rkeFullState, err = cluster.StringToFullState(ctx, stateFile) + + // Move current state file + stateFilePath := cluster.GetStateFilePath(flags.ClusterFilePath, flags.ConfigDir) + err = util.ReplaceFileWithBackup(stateFilePath, "rkestate") + if err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + + err = rkeFullState.WriteStateFile(context.Background(), stateFilePath) + if err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + + // Move current kubeconfig file + err = util.CopyFileWithPrefix(tempCluster.LocalKubeConfigPath, "kube_config") + if err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, err + } + kubeCluster, _ := tempCluster.GetClusterState(ctx, rkeFullState) + + if err := cluster.RebuildKubeconfig(ctx, kubeCluster); err != nil { + return APIURL, caCrt, clientCert, clientKey, nil, nil + } + + return APIURL, caCrt, clientCert, clientKey, nil, nil +} diff --git a/main.go b/main.go index 39044271..2814d9bf 100644 --- a/main.go +++ b/main.go @@ -62,6 +62,7 @@ func mainErr() error { cmd.EtcdCommand(), cmd.CertificateCommand(), cmd.EncryptionCommand(), + cmd.UtilCommand(), } app.Flags = []cli.Flag{ cli.BoolFlag{ diff --git a/pki/deploy.go b/pki/deploy.go index d4098c79..be58821a 100644 --- a/pki/deploy.go +++ b/pki/deploy.go @@ -183,6 +183,7 @@ func doRunDeployer(ctx context.Context, host *hosts.Host, containerEnv []string, func DeployAdminConfig(ctx context.Context, kubeConfig, localConfigPath string) error { if len(kubeConfig) == 0 { + logrus.Infof("kubeConfig is empty") return nil } logrus.Debugf("Deploying admin Kubeconfig locally at [%s]", localConfigPath) diff --git a/services/controlplane.go b/services/controlplane.go index 2318a0cc..5ab15338 100644 --- a/services/controlplane.go +++ b/services/controlplane.go @@ -3,9 +3,11 @@ package services import ( "context" "fmt" + "path" "strings" "sync" + "github.com/docker/docker/api/types/container" "github.com/docker/docker/client" "github.com/rancher/rke/docker" "github.com/rancher/rke/hosts" @@ -366,3 +368,37 @@ func isControlPlaneHostUpgradable(ctx context.Context, host *hosts.Host, process logrus.Debugf("[%s] Host %v is not upgradable", ControlRole, host.HostnameOverride) return false, nil } + +func RunGetStateFileFromConfigMap(ctx context.Context, controlPlaneHost *hosts.Host, prsMap map[string]v3.PrivateRegistry, dockerImage string) (string, error) { + imageCfg := &container.Config{ + Entrypoint: []string{"bash"}, + Cmd: []string{ + "-c", + "kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml -n kube-system get configmap full-cluster-state -o json | jq -r .data.\\\"full-cluster-state\\\" | jq -r . > /tmp/configmap.cluster.rkestate", + }, + Image: dockerImage, + } + hostCfg := &container.HostConfig{ + Binds: []string{ + fmt.Sprintf("%s:/etc/kubernetes:z", path.Join(controlPlaneHost.PrefixPath, "/etc/kubernetes")), + }, + NetworkMode: container.NetworkMode("host"), + RestartPolicy: container.RestartPolicy{Name: "no"}, + } + + if err := docker.DoRemoveContainer(ctx, controlPlaneHost.DClient, ControlPlaneConfigMapStateFileContainerName, controlPlaneHost.Address); err != nil { + return "", err + } + if err := docker.DoRunOnetimeContainer(ctx, controlPlaneHost.DClient, imageCfg, hostCfg, ControlPlaneConfigMapStateFileContainerName, controlPlaneHost.Address, ControlRole, prsMap); err != nil { + return "", err + } + statefile, err := docker.ReadFileFromContainer(ctx, controlPlaneHost.DClient, controlPlaneHost.Address, ControlPlaneConfigMapStateFileContainerName, "/tmp/configmap.cluster.rkestate") + if err != nil { + return "", err + } + if err := docker.DoRemoveContainer(ctx, controlPlaneHost.DClient, ControlPlaneConfigMapStateFileContainerName, controlPlaneHost.Address); err != nil { + return "", err + } + + return statefile, nil +} diff --git a/services/services.go b/services/services.go index 03ac3e2f..7ccf0c70 100644 --- a/services/services.go +++ b/services/services.go @@ -22,24 +22,25 @@ const ( SidekickServiceName = "sidekick" RBACAuthorizationMode = "rbac" - KubeAPIContainerName = "kube-apiserver" - KubeletContainerName = "kubelet" - KubeproxyContainerName = "kube-proxy" - KubeControllerContainerName = "kube-controller-manager" - SchedulerContainerName = "kube-scheduler" - EtcdContainerName = "etcd" - EtcdSnapshotContainerName = "etcd-rolling-snapshots" - EtcdSnapshotOnceContainerName = "etcd-snapshot-once" - EtcdSnapshotRemoveContainerName = "etcd-remove-snapshot" - EtcdRestoreContainerName = "etcd-restore" - EtcdDownloadBackupContainerName = "etcd-download-backup" - EtcdServeBackupContainerName = "etcd-Serve-backup" - EtcdChecksumContainerName = "etcd-checksum-checker" - EtcdStateFileContainerName = "etcd-extract-statefile" - NginxProxyContainerName = "nginx-proxy" - SidekickContainerName = "service-sidekick" - LogLinkContainerName = "rke-log-linker" - LogCleanerContainerName = "rke-log-cleaner" + KubeAPIContainerName = "kube-apiserver" + KubeletContainerName = "kubelet" + KubeproxyContainerName = "kube-proxy" + KubeControllerContainerName = "kube-controller-manager" + SchedulerContainerName = "kube-scheduler" + EtcdContainerName = "etcd" + EtcdSnapshotContainerName = "etcd-rolling-snapshots" + EtcdSnapshotOnceContainerName = "etcd-snapshot-once" + EtcdSnapshotRemoveContainerName = "etcd-remove-snapshot" + EtcdRestoreContainerName = "etcd-restore" + EtcdDownloadBackupContainerName = "etcd-download-backup" + EtcdServeBackupContainerName = "etcd-Serve-backup" + EtcdChecksumContainerName = "etcd-checksum-checker" + EtcdStateFileContainerName = "etcd-extract-statefile" + ControlPlaneConfigMapStateFileContainerName = "extract-statefile-configmap" + NginxProxyContainerName = "nginx-proxy" + SidekickContainerName = "service-sidekick" + LogLinkContainerName = "rke-log-linker" + LogCleanerContainerName = "rke-log-cleaner" KubeAPIPort = 6443 SchedulerPort = 10251 diff --git a/util/util.go b/util/util.go index eedc7583..d3b4d432 100644 --- a/util/util.go +++ b/util/util.go @@ -2,6 +2,8 @@ package util import ( "fmt" + "io" + "io/ioutil" "net/url" "os" "reflect" @@ -180,3 +182,65 @@ func CleanWindowsPath(s string) string { } return strings.Join(new, "\\") } + +func ReplaceFileWithBackup(originalFile, prefixBackupFile string) error { + fileExists, err := IsFileExists(originalFile) + if err != nil { + return err + } + if !fileExists { + return nil + } + tmpfile, err := ioutil.TempFile(".", prefixBackupFile) + if err != nil { + return err + } + err = os.Rename(originalFile, tmpfile.Name()) + if err != nil { + return err + } + logrus.Infof("Moved file [%s] to new location [%s] as back-up", originalFile, tmpfile.Name()) + return nil +} + +func CopyFileWithPrefix(originalFile, prefixDestFile string) error { + fileExists, err := IsFileExists(originalFile) + if err != nil { + return err + } + if !fileExists { + return nil + } + + sourceFileStat, err := os.Stat(originalFile) + if err != nil { + return err + } + + if !sourceFileStat.Mode().IsRegular() { + return fmt.Errorf("%s is not a regular file", originalFile) + } + + source, err := os.Open(originalFile) + if err != nil { + return err + } + defer source.Close() + + destFile, err := ioutil.TempFile(".", prefixDestFile) + if err != nil { + return err + } + + destination, err := os.Create(destFile.Name()) + if err != nil { + return err + } + defer destination.Close() + _, err = io.Copy(destination, source) + if err != nil { + return err + } + logrus.Infof("Copied file [%s] to new location [%s] as back-up", originalFile, destFile.Name()) + return nil +}