diff --git a/cmd/kubeadm/app/cmd/upgrade/plan.go b/cmd/kubeadm/app/cmd/upgrade/plan.go index b2e20ec927b..e1a161400a9 100644 --- a/cmd/kubeadm/app/cmd/upgrade/plan.go +++ b/cmd/kubeadm/app/cmd/upgrade/plan.go @@ -89,14 +89,31 @@ func RunPlan(flags *planFlags) error { return err } - // Define Local Etcd cluster to be able to retrieve information - etcdClient, err := etcdutil.NewStaticPodClient( - []string{"localhost:2379"}, - constants.GetStaticPodDirectory(), - upgradeVars.cfg.CertificatesDir, - ) - if err != nil { - return err + var etcdClient etcdutil.ClusterInterrogator + + // Currently this is the only method we have for distinguishing + // external etcd vs static pod etcd + isExternalEtcd := len(upgradeVars.cfg.Etcd.Endpoints) > 0 + if isExternalEtcd { + client, err := etcdutil.New( + upgradeVars.cfg.Etcd.Endpoints, + upgradeVars.cfg.Etcd.CAFile, + upgradeVars.cfg.Etcd.CertFile, + upgradeVars.cfg.Etcd.KeyFile) + if err != nil { + return err + } + etcdClient = client + } else { + client, err := etcdutil.NewFromStaticPod( + []string{"localhost:2379"}, + constants.GetStaticPodDirectory(), + upgradeVars.cfg.CertificatesDir, + ) + if err != nil { + return err + } + etcdClient = client } // Compute which upgrade possibilities there are @@ -107,13 +124,13 @@ func RunPlan(flags *planFlags) error { } // Tell the user which upgrades are available - printAvailableUpgrades(availUpgrades, os.Stdout, upgradeVars.cfg.FeatureGates) + printAvailableUpgrades(availUpgrades, os.Stdout, upgradeVars.cfg.FeatureGates, isExternalEtcd) return nil } // printAvailableUpgrades prints a UX-friendly overview of what versions are available to upgrade to // TODO look into columnize or some other formatter when time permits instead of using the tabwriter -func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGates map[string]bool) { +func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGates map[string]bool, isExternalEtcd bool) { // Return quickly if no upgrades can be made if len(upgrades) == 0 { @@ -126,6 +143,16 @@ func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGate // Loop through the upgrade possibilities and output text to the command line for _, upgrade := range upgrades { + if isExternalEtcd && upgrade.CanUpgradeEtcd() { + fmt.Fprintln(w, "External components that should be upgraded manually before you upgrade the control plane with 'kubeadm upgrade apply':") + fmt.Fprintln(tabw, "COMPONENT\tCURRENT\tAVAILABLE") + fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion) + + // We should flush the writer here at this stage; as the columns will now be of the right size, adjusted to the above content + tabw.Flush() + fmt.Fprintln(w, "") + } + if upgrade.CanUpgradeKubelets() { fmt.Fprintln(w, "Components that must be upgraded manually after you have upgraded the control plane with 'kubeadm upgrade apply':") fmt.Fprintln(tabw, "COMPONENT\tCURRENT\tAVAILABLE") @@ -160,7 +187,9 @@ func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGate } else { fmt.Fprintf(tabw, "Kube DNS\t%s\t%s\n", upgrade.Before.DNSVersion, upgrade.After.DNSVersion) } - fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion) + if !isExternalEtcd { + fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion) + } // The tabwriter should be flushed at this stage as we have now put in all the required content for this time. This is required for the tabs' size to be correct. tabw.Flush() diff --git a/cmd/kubeadm/app/cmd/upgrade/plan_test.go b/cmd/kubeadm/app/cmd/upgrade/plan_test.go index 370066310fb..f83c183f15a 100644 --- a/cmd/kubeadm/app/cmd/upgrade/plan_test.go +++ b/cmd/kubeadm/app/cmd/upgrade/plan_test.go @@ -62,16 +62,27 @@ func TestSortedSliceFromStringIntMap(t *testing.T) { func TestPrintAvailableUpgrades(t *testing.T) { featureGates := make(map[string]bool) var tests = []struct { + name string upgrades []upgrade.Upgrade buf *bytes.Buffer expectedBytes []byte + externalEtcd bool }{ { + name: "Up to date", upgrades: []upgrade.Upgrade{}, expectedBytes: []byte(`Awesome, you're up-to-date! Enjoy! `), }, { + name: "Up to date external etcd", + externalEtcd: true, + upgrades: []upgrade.Upgrade{}, + expectedBytes: []byte(`Awesome, you're up-to-date! Enjoy! +`), + }, + { + name: "Patch version available", upgrades: []upgrade.Upgrade{ { Description: "version in the v1.8 series", @@ -117,6 +128,7 @@ _____________________________________________________________________ `), }, { + name: "minor version available", upgrades: []upgrade.Upgrade{ { Description: "stable version", @@ -160,6 +172,7 @@ _____________________________________________________________________ `), }, { + name: "patch and minor version available", upgrades: []upgrade.Upgrade{ { Description: "version in the v1.8 series", @@ -243,6 +256,7 @@ _____________________________________________________________________ `), }, { + name: "experimental version available", upgrades: []upgrade.Upgrade{ { Description: "experimental version", @@ -288,6 +302,7 @@ _____________________________________________________________________ `), }, { + name: "release candidate available", upgrades: []upgrade.Upgrade{ { Description: "release candidate version", @@ -333,6 +348,7 @@ _____________________________________________________________________ `), }, { + name: "multiple kubelet versions", upgrades: []upgrade.Upgrade{ { Description: "version in the v1.9 series", @@ -377,19 +393,71 @@ Note: Before you can perform this upgrade, you have to update kubeadm to v1.9.3. _____________________________________________________________________ +`), + }, + { + name: "external etcd upgrade available", + upgrades: []upgrade.Upgrade{ + { + Description: "version in the v1.9 series", + Before: upgrade.ClusterState{ + KubeVersion: "v1.9.2", + KubeletVersions: map[string]uint16{ + "v1.9.2": 1, + }, + KubeadmVersion: "v1.9.2", + DNSVersion: "1.14.5", + EtcdVersion: "3.0.17", + }, + After: upgrade.ClusterState{ + KubeVersion: "v1.9.3", + KubeadmVersion: "v1.9.3", + DNSVersion: "1.14.8", + EtcdVersion: "3.1.12", + }, + }, + }, + externalEtcd: true, + expectedBytes: []byte(`External components that should be upgraded manually before you upgrade the control plane with 'kubeadm upgrade apply': +COMPONENT CURRENT AVAILABLE +Etcd 3.0.17 3.1.12 + +Components that must be upgraded manually after you have upgraded the control plane with 'kubeadm upgrade apply': +COMPONENT CURRENT AVAILABLE +Kubelet 1 x v1.9.2 v1.9.3 + +Upgrade to the latest version in the v1.9 series: + +COMPONENT CURRENT AVAILABLE +API Server v1.9.2 v1.9.3 +Controller Manager v1.9.2 v1.9.3 +Scheduler v1.9.2 v1.9.3 +Kube Proxy v1.9.2 v1.9.3 +Kube DNS 1.14.5 1.14.8 + +You can now apply the upgrade by executing the following command: + + kubeadm upgrade apply v1.9.3 + +Note: Before you can perform this upgrade, you have to update kubeadm to v1.9.3. + +_____________________________________________________________________ + `), }, } for _, rt := range tests { - rt.buf = bytes.NewBufferString("") - printAvailableUpgrades(rt.upgrades, rt.buf, featureGates) - actualBytes := rt.buf.Bytes() - if !bytes.Equal(actualBytes, rt.expectedBytes) { - t.Errorf( - "failed PrintAvailableUpgrades:\n\texpected: %q\n\t actual: %q", - string(rt.expectedBytes), - string(actualBytes), - ) - } + t.Run(rt.name, func(t *testing.T) { + rt.buf = bytes.NewBufferString("") + printAvailableUpgrades(rt.upgrades, rt.buf, featureGates, rt.externalEtcd) + actualBytes := rt.buf.Bytes() + if !bytes.Equal(actualBytes, rt.expectedBytes) { + t.Errorf( + "failed PrintAvailableUpgrades:\n\texpected: %q\n\t actual: %q", + string(rt.expectedBytes), + string(actualBytes), + ) + } + }) } } diff --git a/cmd/kubeadm/app/phases/upgrade/compute.go b/cmd/kubeadm/app/phases/upgrade/compute.go index e36bb0acc17..dca2c9939f8 100644 --- a/cmd/kubeadm/app/phases/upgrade/compute.go +++ b/cmd/kubeadm/app/phases/upgrade/compute.go @@ -50,6 +50,11 @@ func (u *Upgrade) CanUpgradeKubelets() bool { return !sameVersionFound } +// CanUpgradeEtcd returns whether an upgrade of etcd is possible +func (u *Upgrade) CanUpgradeEtcd() bool { + return u.Before.EtcdVersion != u.After.EtcdVersion +} + // ActiveDNSAddon returns the version of CoreDNS or kube-dns func ActiveDNSAddon(featureGates map[string]bool) string { if features.Enabled(featureGates, features.CoreDNS) { @@ -74,7 +79,7 @@ type ClusterState struct { // GetAvailableUpgrades fetches all versions from the specified VersionGetter and computes which // kinds of upgrades can be performed -func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesAllowed, rcUpgradesAllowed bool, etcdClient etcdutil.Client, featureGates map[string]bool) ([]Upgrade, error) { +func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesAllowed, rcUpgradesAllowed bool, etcdClient etcdutil.ClusterInterrogator, featureGates map[string]bool) ([]Upgrade, error) { fmt.Println("[upgrade] Fetching available versions to upgrade to") // Collect the upgrades kubeadm can do in this list @@ -83,13 +88,13 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA // Get the cluster version clusterVersionStr, clusterVersion, err := versionGetterImpl.ClusterVersion() if err != nil { - return nil, err + return upgrades, err } // Get current kubeadm CLI version kubeadmVersionStr, kubeadmVersion, err := versionGetterImpl.KubeadmVersion() if err != nil { - return nil, err + return upgrades, err } // Get and output the current latest stable version @@ -103,13 +108,13 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA // Get the kubelet versions in the cluster kubeletVersions, err := versionGetterImpl.KubeletVersions() if err != nil { - return nil, err + return upgrades, err } // Get current etcd version - etcdStatus, err := etcdClient.GetStatus() + etcdVersion, err := etcdClient.GetVersion() if err != nil { - return nil, err + return upgrades, err } // Construct a descriptor for the current state of the world @@ -118,7 +123,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA DNSVersion: dns.GetDNSVersion(clusterVersion, ActiveDNSAddon(featureGates)), KubeadmVersion: kubeadmVersionStr, KubeletVersions: kubeletVersions, - EtcdVersion: etcdStatus.Version, + EtcdVersion: etcdVersion, } // Do a "dumb guess" that a new minor upgrade is available just because the latest stable version is higher than the cluster version @@ -201,7 +206,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA // Get and output the current latest unstable version latestVersionStr, latestVersion, err := versionGetterImpl.VersionFromCILabel("latest", "experimental version") if err != nil { - return nil, err + return upgrades, err } minorUnstable := latestVersion.Components()[1] @@ -209,7 +214,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA previousBranch := fmt.Sprintf("latest-1.%d", minorUnstable-1) previousBranchLatestVersionStr, previousBranchLatestVersion, err := versionGetterImpl.VersionFromCILabel(previousBranch, "") if err != nil { - return nil, err + return upgrades, err } // If that previous latest version is an RC, RCs are allowed and the cluster version is lower than the RC version, show the upgrade diff --git a/cmd/kubeadm/app/phases/upgrade/compute_test.go b/cmd/kubeadm/app/phases/upgrade/compute_test.go index 071284a56cd..c5341a75f2f 100644 --- a/cmd/kubeadm/app/phases/upgrade/compute_test.go +++ b/cmd/kubeadm/app/phases/upgrade/compute_test.go @@ -17,11 +17,13 @@ limitations under the License. package upgrade import ( + "fmt" "reflect" "testing" "time" "github.com/coreos/etcd/clientv3" + etcdutil "k8s.io/kubernetes/cmd/kubeadm/app/util/etcd" versionutil "k8s.io/kubernetes/pkg/util/version" ) @@ -62,28 +64,54 @@ func (f *fakeVersionGetter) KubeletVersions() (map[string]uint16, error) { }, nil } -type fakeEtcdCluster struct{ TLS bool } - -func (f fakeEtcdCluster) HasTLS() bool { return f.TLS } - -func (f fakeEtcdCluster) GetStatus() (*clientv3.StatusResponse, error) { - client := &clientv3.StatusResponse{} - client.Version = "3.1.12" - return client, nil +type fakeEtcdClient struct { + TLS bool + mismatchedVersions bool } -func (f fakeEtcdCluster) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) { - return f.GetStatus() +func (f fakeEtcdClient) HasTLS() bool { return f.TLS } + +func (f fakeEtcdClient) ClusterAvailable() (bool, error) { return true, nil } + +func (f fakeEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) { + return true, nil +} + +func (f fakeEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) { + return make(map[string]*clientv3.StatusResponse), nil +} + +func (f fakeEtcdClient) GetVersion() (string, error) { + versions, _ := f.GetClusterVersions() + if f.mismatchedVersions { + return "", fmt.Errorf("etcd cluster contains endpoints with mismatched versions: %v", versions) + } + return "3.1.12", nil +} + +func (f fakeEtcdClient) GetClusterVersions() (map[string]string, error) { + if f.mismatchedVersions { + return map[string]string{ + "foo": "3.1.12", + "bar": "3.2.0", + }, nil + } + return map[string]string{ + "foo": "3.1.12", + "bar": "3.1.12", + }, nil } func TestGetAvailableUpgrades(t *testing.T) { featureGates := make(map[string]bool) + etcdClient := fakeEtcdClient{} tests := []struct { name string vg VersionGetter expectedUpgrades []Upgrade allowExperimental, allowRCs bool errExpected bool + etcdClient etcdutil.ClusterInterrogator }{ { name: "no action needed, already up-to-date", @@ -98,6 +126,7 @@ func TestGetAvailableUpgrades(t *testing.T) { expectedUpgrades: []Upgrade{}, allowExperimental: false, errExpected: false, + etcdClient: etcdClient, }, { name: "simple patch version upgrade", @@ -131,6 +160,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: false, errExpected: false, + etcdClient: etcdClient, }, { name: "no version provided to offline version getter does not change behavior", @@ -164,6 +194,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: false, errExpected: false, + etcdClient: etcdClient, }, { name: "minor version upgrade only", @@ -197,6 +228,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: false, errExpected: false, + etcdClient: etcdClient, }, { name: "both minor version upgrade and patch version upgrade available", @@ -248,6 +280,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: false, errExpected: false, + etcdClient: etcdClient, }, { name: "allow experimental upgrades, but no upgrade available", @@ -263,6 +296,7 @@ func TestGetAvailableUpgrades(t *testing.T) { expectedUpgrades: []Upgrade{}, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, }, { name: "upgrade to an unstable version should be supported", @@ -297,6 +331,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, }, { name: "upgrade from an unstable version to an unstable version should be supported", @@ -331,6 +366,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, }, { name: "v1.X.0-alpha.0 should be ignored", @@ -366,6 +402,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, }, { name: "upgrade to an RC version should be supported", @@ -401,6 +438,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowRCs: true, errExpected: false, + etcdClient: etcdClient, }, { name: "it is possible (but very uncommon) that the latest version from the previous branch is an rc and the current latest version is alpha.0. In that case, show the RC", @@ -436,6 +474,7 @@ func TestGetAvailableUpgrades(t *testing.T) { }, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, }, { name: "upgrade to an RC version should be supported. There may also be an even newer unstable version.", @@ -490,6 +529,22 @@ func TestGetAvailableUpgrades(t *testing.T) { allowRCs: true, allowExperimental: true, errExpected: false, + etcdClient: etcdClient, + }, + { + name: "Upgrades with external etcd with mismatched versions should not be allowed.", + vg: &fakeVersionGetter{ + clusterVersion: "v1.9.3", + kubeletVersion: "v1.9.3", + kubeadmVersion: "v1.9.3", + stablePatchVersion: "v1.9.3", + stableVersion: "v1.9.3", + }, + allowRCs: false, + allowExperimental: false, + etcdClient: fakeEtcdClient{mismatchedVersions: true}, + expectedUpgrades: []Upgrade{}, + errExpected: true, }, { name: "offline version getter", @@ -498,6 +553,7 @@ func TestGetAvailableUpgrades(t *testing.T) { kubeletVersion: "v1.10.0", kubeadmVersion: "v1.10.1", }, "v1.11.1"), + etcdClient: etcdClient, expectedUpgrades: []Upgrade{ { Description: "version in the v1.1 series", @@ -523,16 +579,19 @@ func TestGetAvailableUpgrades(t *testing.T) { // Instantiating a fake etcd cluster for being able to get etcd version for a corresponding // kubernetes release. - testCluster := fakeEtcdCluster{} for _, rt := range tests { t.Run(rt.name, func(t *testing.T) { - actualUpgrades, actualErr := GetAvailableUpgrades(rt.vg, rt.allowExperimental, rt.allowRCs, testCluster, featureGates) + actualUpgrades, actualErr := GetAvailableUpgrades(rt.vg, rt.allowExperimental, rt.allowRCs, rt.etcdClient, featureGates) + fmt.Printf("actualErr: %v\n", actualErr) + fmt.Printf("actualErr != nil: %v\n", actualErr != nil) + fmt.Printf("errExpected: %v\n", rt.errExpected) + if (actualErr != nil) != rt.errExpected { + fmt.Printf("Hello error") + t.Errorf("failed TestGetAvailableUpgrades\n\texpected error: %t\n\tgot error: %t", rt.errExpected, (actualErr != nil)) + } if !reflect.DeepEqual(actualUpgrades, rt.expectedUpgrades) { t.Errorf("failed TestGetAvailableUpgrades\n\texpected upgrades: %v\n\tgot: %v", rt.expectedUpgrades, actualUpgrades) } - if (actualErr != nil) != rt.errExpected { - t.Errorf("failed TestGetAvailableUpgrades\n\texpected error: %t\n\tgot error: %t", rt.errExpected, (actualErr != nil)) - } }) } } diff --git a/cmd/kubeadm/app/phases/upgrade/staticpods.go b/cmd/kubeadm/app/phases/upgrade/staticpods.go index 255d1b4461e..488a850636f 100644 --- a/cmd/kubeadm/app/phases/upgrade/staticpods.go +++ b/cmd/kubeadm/app/phases/upgrade/staticpods.go @@ -224,14 +224,14 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP } // performEtcdStaticPodUpgrade performs upgrade of etcd, it returns bool which indicates fatal error or not and the actual error. -func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, recoverManifests map[string]string, isTLSUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.Client) (bool, error) { +func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, recoverManifests map[string]string, isTLSUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) (bool, error) { // Add etcd static pod spec only if external etcd is not configured if len(cfg.Etcd.Endpoints) != 0 { return false, fmt.Errorf("external etcd detected, won't try to change any etcd state") } // Checking health state of etcd before proceeding with the upgrade - etcdStatus, err := oldEtcdClient.GetStatus() + _, err := oldEtcdClient.GetClusterStatus() if err != nil { return true, fmt.Errorf("etcd cluster is not healthy: %v", err) } @@ -248,9 +248,13 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM if err != nil { return true, fmt.Errorf("failed to retrieve an etcd version for the target kubernetes version: %v", err) } - currentEtcdVersion, err := version.ParseSemantic(etcdStatus.Version) + currentEtcdVersionStr, err := oldEtcdClient.GetVersion() if err != nil { - return true, fmt.Errorf("failed to parse the current etcd version(%s): %v", etcdStatus.Version, err) + return true, fmt.Errorf("failed to retrieve the current etcd version: %v", err) + } + currentEtcdVersion, err := version.ParseSemantic(currentEtcdVersionStr) + if err != nil { + return true, fmt.Errorf("failed to parse the current etcd version(%s): %v", currentEtcdVersionStr, err) } // Comparing current etcd version with desired to catch the same version or downgrade condition and fail on them. @@ -292,7 +296,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM // Since upgrade component failed, the old etcd manifest has either been restored or was never touched // Now we need to check the health of etcd cluster if it is up with old manifest fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available") - if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil { + if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil { fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err) // At this point we know that etcd cluster is dead and it is safe to copy backup datastore and to rollback old etcd manifest @@ -305,7 +309,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM // Now that we've rolled back the data, let's check if the cluster comes up fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available") - if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil { + if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil { fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err) // Nothing else left to try to recover etcd cluster return true, fmt.Errorf("fatal error rolling back local etcd cluster manifest: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir) @@ -321,7 +325,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM // Initialize the new etcd client if it wasn't pre-initialized if newEtcdClient == nil { - client, err := etcdutil.NewStaticPodClient( + client, err := etcdutil.NewFromStaticPod( []string{"localhost:2379"}, constants.GetStaticPodDirectory(), cfg.CertificatesDir, @@ -334,7 +338,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM // Checking health state of etcd after the upgrade fmt.Println("[upgrade/etcd] Waiting for etcd to become available") - if _, err = newEtcdClient.WaitForStatus(podRestartDelay, retries, retryInterval); err != nil { + if _, err = newEtcdClient.WaitForClusterAvailable(podRestartDelay, retries, retryInterval); err != nil { fmt.Printf("[upgrade/etcd] Failed to healthcheck etcd: %v\n", err) // Despite the fact that upgradeComponent was successful, there is something wrong with the etcd cluster // First step is to restore back up of datastore @@ -352,7 +356,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM // Assuming rollback of the old etcd manifest was successful, check the status of etcd cluster again fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available") - if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil { + if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil { fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err) // Nothing else left to try to recover etcd cluster return true, fmt.Errorf("fatal error rolling back local etcd cluster manifest: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir) @@ -367,7 +371,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM } // StaticPodControlPlane upgrades a static pod-hosted control plane -func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, etcdUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.Client) error { +func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, etcdUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) error { recoverManifests := map[string]string{} var isTLSUpgrade bool var isExternalEtcd bool @@ -381,7 +385,7 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager if len(cfg.Etcd.Endpoints) > 0 { // External etcd isExternalEtcd = true - client, err := etcdutil.NewClient( + client, err := etcdutil.New( cfg.Etcd.Endpoints, cfg.Etcd.CAFile, cfg.Etcd.CertFile, @@ -397,7 +401,7 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager } } else { // etcd Static Pod - client, err := etcdutil.NewStaticPodClient( + client, err := etcdutil.NewFromStaticPod( []string{"localhost:2379"}, constants.GetStaticPodDirectory(), cfg.CertificatesDir, diff --git a/cmd/kubeadm/app/phases/upgrade/staticpods_test.go b/cmd/kubeadm/app/phases/upgrade/staticpods_test.go index dc34d0ca973..c18ff42ad36 100644 --- a/cmd/kubeadm/app/phases/upgrade/staticpods_test.go +++ b/cmd/kubeadm/app/phases/upgrade/staticpods_test.go @@ -215,14 +215,27 @@ func (c fakeTLSEtcdClient) HasTLS() bool { return c.TLS } -func (c fakeTLSEtcdClient) GetStatus() (*clientv3.StatusResponse, error) { - client := &clientv3.StatusResponse{} - client.Version = "3.1.12" - return client, nil +func (c fakeTLSEtcdClient) ClusterAvailable() (bool, error) { return true, nil } + +func (c fakeTLSEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) { + return true, nil } -func (c fakeTLSEtcdClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) { - return c.GetStatus() +func (c fakeTLSEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) { + return map[string]*clientv3.StatusResponse{ + "foo": { + Version: "3.1.12", + }}, nil +} + +func (c fakeTLSEtcdClient) GetClusterVersions() (map[string]string, error) { + return map[string]string{ + "foo": "3.1.12", + }, nil +} + +func (c fakeTLSEtcdClient) GetVersion() (string, error) { + return "3.1.12", nil } type fakePodManifestEtcdClient struct{ ManifestDir, CertificatesDir string } @@ -232,7 +245,13 @@ func (c fakePodManifestEtcdClient) HasTLS() bool { return hasTLS } -func (c fakePodManifestEtcdClient) GetStatus() (*clientv3.StatusResponse, error) { +func (c fakePodManifestEtcdClient) ClusterAvailable() (bool, error) { return true, nil } + +func (c fakePodManifestEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) { + return true, nil +} + +func (c fakePodManifestEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) { // Make sure the certificates generated from the upgrade are readable from disk tlsInfo := transport.TLSInfo{ CertFile: filepath.Join(c.CertificatesDir, constants.EtcdCACertName), @@ -244,13 +263,19 @@ func (c fakePodManifestEtcdClient) GetStatus() (*clientv3.StatusResponse, error) return nil, err } - client := &clientv3.StatusResponse{} - client.Version = "3.1.12" - return client, nil + return map[string]*clientv3.StatusResponse{ + "foo": {Version: "3.1.12"}, + }, nil } -func (c fakePodManifestEtcdClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) { - return c.GetStatus() +func (c fakePodManifestEtcdClient) GetClusterVersions() (map[string]string, error) { + return map[string]string{ + "foo": "3.1.12", + }, nil +} + +func (c fakePodManifestEtcdClient) GetVersion() (string, error) { + return "3.1.12", nil } func TestStaticPodControlPlane(t *testing.T) { diff --git a/cmd/kubeadm/app/util/etcd/etcd.go b/cmd/kubeadm/app/util/etcd/etcd.go index 9eec25c4595..3bd3e8034a7 100644 --- a/cmd/kubeadm/app/util/etcd/etcd.go +++ b/cmd/kubeadm/app/util/etcd/etcd.go @@ -30,22 +30,25 @@ import ( "k8s.io/kubernetes/cmd/kubeadm/app/util/staticpod" ) -// Client is an interface to get etcd cluster related information -type Client interface { - GetStatus() (*clientv3.StatusResponse, error) - WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) +// ClusterInterrogator is an interface to get etcd cluster related information +type ClusterInterrogator interface { + ClusterAvailable() (bool, error) + GetClusterStatus() (map[string]*clientv3.StatusResponse, error) + GetClusterVersions() (map[string]string, error) + GetVersion() (string, error) HasTLS() bool + WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) } -// GenericClient is a common etcd client for supported etcd servers -type GenericClient struct { +// Client provides connection parameters for an etcd cluster +type Client struct { Endpoints []string - TLSConfig *tls.Config + TLS *tls.Config } // HasTLS returns true if etcd is configured for TLS -func (c GenericClient) HasTLS() bool { - return c.TLSConfig != nil +func (c Client) HasTLS() bool { + return c.TLS != nil } // PodManifestsHaveTLS reads the etcd staticpod manifest from disk and returns false if the TLS flags @@ -83,31 +86,118 @@ FlagLoop: return true, nil } -// GetStatus gets server status -func (c GenericClient) GetStatus() (*clientv3.StatusResponse, error) { - const dialTimeout = 5 * time.Second +// New creates a new EtcdCluster client +func New(endpoints []string, ca, cert, key string) (*Client, error) { + client := Client{Endpoints: endpoints} + + if ca != "" || cert != "" || key != "" { + tlsInfo := transport.TLSInfo{ + CertFile: cert, + KeyFile: key, + TrustedCAFile: ca, + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return nil, err + } + client.TLS = tlsConfig + } + + return &client, nil +} + +// NewFromStaticPod creates a GenericClient from the given endpoints, manifestDir, and certificatesDir +func NewFromStaticPod(endpoints []string, manifestDir string, certificatesDir string) (*Client, error) { + hasTLS, err := PodManifestsHaveTLS(manifestDir) + if err != nil { + return nil, fmt.Errorf("could not read manifests from: %s, error: %v", manifestDir, err) + } + if hasTLS { + return New( + endpoints, + filepath.Join(certificatesDir, constants.EtcdCACertName), + filepath.Join(certificatesDir, constants.EtcdHealthcheckClientCertName), + filepath.Join(certificatesDir, constants.EtcdHealthcheckClientKeyName), + ) + } + return New(endpoints, "", "", "") +} + +// GetVersion returns the etcd version of the cluster. +// An error is returned if the version of all endpoints do not match +func (c Client) GetVersion() (string, error) { + var clusterVersion string + + versions, err := c.GetClusterVersions() + if err != nil { + return "", err + } + for _, v := range versions { + if clusterVersion == "" { + // This is the first version we've seen + clusterVersion = v + } else if v != clusterVersion { + return "", fmt.Errorf("etcd cluster contains endpoints with mismatched versions: %v", versions) + } else { + clusterVersion = v + } + } + if clusterVersion == "" { + return "", fmt.Errorf("could not determine cluster etcd version") + } + return clusterVersion, nil +} + +// GetClusterVersions returns a map of the endpoints and their associated versions +func (c Client) GetClusterVersions() (map[string]string, error) { + versions := make(map[string]string) + statuses, err := c.GetClusterStatus() + if err != nil { + return versions, err + } + + for ep, status := range statuses { + versions[ep] = status.Version + } + return versions, nil +} + +// ClusterAvailable returns true if the cluster status indicates the cluster is available. +func (c Client) ClusterAvailable() (bool, error) { + _, err := c.GetClusterStatus() + if err != nil { + return false, err + } + return true, nil +} + +// GetClusterStatus returns nil for status Up or error for status Down +func (c Client) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) { cli, err := clientv3.New(clientv3.Config{ Endpoints: c.Endpoints, - DialTimeout: dialTimeout, - TLS: c.TLSConfig, + DialTimeout: 5 * time.Second, + TLS: c.TLS, }) if err != nil { return nil, err } defer cli.Close() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - resp, err := cli.Status(ctx, c.Endpoints[0]) - cancel() - if err != nil { - return nil, err + clusterStatus := make(map[string]*clientv3.StatusResponse) + for _, ep := range c.Endpoints { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + resp, err := cli.Status(ctx, ep) + cancel() + if err != nil { + return nil, err + } + clusterStatus[ep] = resp } - - return resp, nil + return clusterStatus, nil } -// WaitForStatus returns a StatusResponse after an initial delay and retry attempts -func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) { +// WaitForClusterAvailable returns true if all endpoints in the cluster are available after an initial delay and retry attempts, an error is returned otherwise +func (c Client) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) { fmt.Printf("[util/etcd] Waiting %v for initial delay\n", delay) time.Sleep(delay) for i := 0; i < retries; i++ { @@ -115,8 +205,8 @@ func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInte fmt.Printf("[util/etcd] Waiting %v until next retry\n", retryInterval) time.Sleep(retryInterval) } - fmt.Printf("[util/etcd] Attempting to get etcd status %d/%d\n", i+1, retries) - resp, err := c.GetStatus() + fmt.Printf("[util/etcd] Attempting to see if all cluster endpoints are available %d/%d\n", i+1, retries) + resp, err := c.ClusterAvailable() if err != nil { switch err { case context.DeadlineExceeded: @@ -128,42 +218,5 @@ func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInte } return resp, nil } - return nil, fmt.Errorf("timeout waiting for etcd cluster status") -} - -// NewClient creates a new EtcdCluster client -func NewClient(endpoints []string, caFile string, certFile string, keyFile string) (*GenericClient, error) { - client := GenericClient{Endpoints: endpoints} - - if caFile != "" || certFile != "" || keyFile != "" { - tlsInfo := transport.TLSInfo{ - CertFile: certFile, - KeyFile: keyFile, - TrustedCAFile: caFile, - } - tlsConfig, err := tlsInfo.ClientConfig() - if err != nil { - return nil, err - } - client.TLSConfig = tlsConfig - } - - return &client, nil -} - -// NewStaticPodClient creates a GenericClient from the given endpoints, manifestDir, and certificatesDir -func NewStaticPodClient(endpoints []string, manifestDir string, certificatesDir string) (*GenericClient, error) { - hasTLS, err := PodManifestsHaveTLS(manifestDir) - if err != nil { - return nil, fmt.Errorf("could not read manifests from: %s, error: %v", manifestDir, err) - } - if hasTLS { - return NewClient( - endpoints, - filepath.Join(certificatesDir, constants.EtcdCACertName), - filepath.Join(certificatesDir, constants.EtcdHealthcheckClientCertName), - filepath.Join(certificatesDir, constants.EtcdHealthcheckClientKeyName), - ) - } - return NewClient(endpoints, "", "", "") + return false, fmt.Errorf("timeout waiting for etcd cluster to be available") }