Merge pull request #63495 from detiber/external_etcd_upgrade

Automatic merge from submit-queue (batch tested with PRs 63792, 63495, 63742, 63332, 63779). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

kubeadm - fix upgrades with external etcd

**What this PR does / why we need it**:

- Allow for upgrade plan and upgrade apply to work with external etcd
  - https://github.com/kubernetes/kubeadm/issues/727
  - https://github.com/kubernetes/kubernetes/pull/62141

- Update upgrade plan output when configured for external etcd
  - Move etcd to a separate section and show available upgrades

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes https://github.com/kubernetes/kubeadm/issues/727

**Release note**:
```release-note
kubeadm upgrade now supports external etcd setups again
```

I created a gist documenting the manual testing I've been doing for this PR here: https://gist.github.com/detiber/e18d907c41901fbb5e12ffa1af5750f8
This commit is contained in:
Kubernetes Submit Queue 2018-05-15 09:04:20 -07:00 committed by GitHub
commit 5788d4de1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 375 additions and 132 deletions

View File

@ -89,14 +89,31 @@ func RunPlan(flags *planFlags) error {
return err
}
// Define Local Etcd cluster to be able to retrieve information
etcdClient, err := etcdutil.NewStaticPodClient(
[]string{"localhost:2379"},
constants.GetStaticPodDirectory(),
upgradeVars.cfg.CertificatesDir,
)
if err != nil {
return err
var etcdClient etcdutil.ClusterInterrogator
// Currently this is the only method we have for distinguishing
// external etcd vs static pod etcd
isExternalEtcd := len(upgradeVars.cfg.Etcd.Endpoints) > 0
if isExternalEtcd {
client, err := etcdutil.New(
upgradeVars.cfg.Etcd.Endpoints,
upgradeVars.cfg.Etcd.CAFile,
upgradeVars.cfg.Etcd.CertFile,
upgradeVars.cfg.Etcd.KeyFile)
if err != nil {
return err
}
etcdClient = client
} else {
client, err := etcdutil.NewFromStaticPod(
[]string{"localhost:2379"},
constants.GetStaticPodDirectory(),
upgradeVars.cfg.CertificatesDir,
)
if err != nil {
return err
}
etcdClient = client
}
// Compute which upgrade possibilities there are
@ -107,13 +124,13 @@ func RunPlan(flags *planFlags) error {
}
// Tell the user which upgrades are available
printAvailableUpgrades(availUpgrades, os.Stdout, upgradeVars.cfg.FeatureGates)
printAvailableUpgrades(availUpgrades, os.Stdout, upgradeVars.cfg.FeatureGates, isExternalEtcd)
return nil
}
// printAvailableUpgrades prints a UX-friendly overview of what versions are available to upgrade to
// TODO look into columnize or some other formatter when time permits instead of using the tabwriter
func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGates map[string]bool) {
func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGates map[string]bool, isExternalEtcd bool) {
// Return quickly if no upgrades can be made
if len(upgrades) == 0 {
@ -126,6 +143,16 @@ func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGate
// Loop through the upgrade possibilities and output text to the command line
for _, upgrade := range upgrades {
if isExternalEtcd && upgrade.CanUpgradeEtcd() {
fmt.Fprintln(w, "External components that should be upgraded manually before you upgrade the control plane with 'kubeadm upgrade apply':")
fmt.Fprintln(tabw, "COMPONENT\tCURRENT\tAVAILABLE")
fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion)
// We should flush the writer here at this stage; as the columns will now be of the right size, adjusted to the above content
tabw.Flush()
fmt.Fprintln(w, "")
}
if upgrade.CanUpgradeKubelets() {
fmt.Fprintln(w, "Components that must be upgraded manually after you have upgraded the control plane with 'kubeadm upgrade apply':")
fmt.Fprintln(tabw, "COMPONENT\tCURRENT\tAVAILABLE")
@ -160,7 +187,9 @@ func printAvailableUpgrades(upgrades []upgrade.Upgrade, w io.Writer, featureGate
} else {
fmt.Fprintf(tabw, "Kube DNS\t%s\t%s\n", upgrade.Before.DNSVersion, upgrade.After.DNSVersion)
}
fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion)
if !isExternalEtcd {
fmt.Fprintf(tabw, "Etcd\t%s\t%s\n", upgrade.Before.EtcdVersion, upgrade.After.EtcdVersion)
}
// The tabwriter should be flushed at this stage as we have now put in all the required content for this time. This is required for the tabs' size to be correct.
tabw.Flush()

View File

@ -62,16 +62,27 @@ func TestSortedSliceFromStringIntMap(t *testing.T) {
func TestPrintAvailableUpgrades(t *testing.T) {
featureGates := make(map[string]bool)
var tests = []struct {
name string
upgrades []upgrade.Upgrade
buf *bytes.Buffer
expectedBytes []byte
externalEtcd bool
}{
{
name: "Up to date",
upgrades: []upgrade.Upgrade{},
expectedBytes: []byte(`Awesome, you're up-to-date! Enjoy!
`),
},
{
name: "Up to date external etcd",
externalEtcd: true,
upgrades: []upgrade.Upgrade{},
expectedBytes: []byte(`Awesome, you're up-to-date! Enjoy!
`),
},
{
name: "Patch version available",
upgrades: []upgrade.Upgrade{
{
Description: "version in the v1.8 series",
@ -117,6 +128,7 @@ _____________________________________________________________________
`),
},
{
name: "minor version available",
upgrades: []upgrade.Upgrade{
{
Description: "stable version",
@ -160,6 +172,7 @@ _____________________________________________________________________
`),
},
{
name: "patch and minor version available",
upgrades: []upgrade.Upgrade{
{
Description: "version in the v1.8 series",
@ -243,6 +256,7 @@ _____________________________________________________________________
`),
},
{
name: "experimental version available",
upgrades: []upgrade.Upgrade{
{
Description: "experimental version",
@ -288,6 +302,7 @@ _____________________________________________________________________
`),
},
{
name: "release candidate available",
upgrades: []upgrade.Upgrade{
{
Description: "release candidate version",
@ -333,6 +348,7 @@ _____________________________________________________________________
`),
},
{
name: "multiple kubelet versions",
upgrades: []upgrade.Upgrade{
{
Description: "version in the v1.9 series",
@ -377,19 +393,71 @@ Note: Before you can perform this upgrade, you have to update kubeadm to v1.9.3.
_____________________________________________________________________
`),
},
{
name: "external etcd upgrade available",
upgrades: []upgrade.Upgrade{
{
Description: "version in the v1.9 series",
Before: upgrade.ClusterState{
KubeVersion: "v1.9.2",
KubeletVersions: map[string]uint16{
"v1.9.2": 1,
},
KubeadmVersion: "v1.9.2",
DNSVersion: "1.14.5",
EtcdVersion: "3.0.17",
},
After: upgrade.ClusterState{
KubeVersion: "v1.9.3",
KubeadmVersion: "v1.9.3",
DNSVersion: "1.14.8",
EtcdVersion: "3.1.12",
},
},
},
externalEtcd: true,
expectedBytes: []byte(`External components that should be upgraded manually before you upgrade the control plane with 'kubeadm upgrade apply':
COMPONENT CURRENT AVAILABLE
Etcd 3.0.17 3.1.12
Components that must be upgraded manually after you have upgraded the control plane with 'kubeadm upgrade apply':
COMPONENT CURRENT AVAILABLE
Kubelet 1 x v1.9.2 v1.9.3
Upgrade to the latest version in the v1.9 series:
COMPONENT CURRENT AVAILABLE
API Server v1.9.2 v1.9.3
Controller Manager v1.9.2 v1.9.3
Scheduler v1.9.2 v1.9.3
Kube Proxy v1.9.2 v1.9.3
Kube DNS 1.14.5 1.14.8
You can now apply the upgrade by executing the following command:
kubeadm upgrade apply v1.9.3
Note: Before you can perform this upgrade, you have to update kubeadm to v1.9.3.
_____________________________________________________________________
`),
},
}
for _, rt := range tests {
rt.buf = bytes.NewBufferString("")
printAvailableUpgrades(rt.upgrades, rt.buf, featureGates)
actualBytes := rt.buf.Bytes()
if !bytes.Equal(actualBytes, rt.expectedBytes) {
t.Errorf(
"failed PrintAvailableUpgrades:\n\texpected: %q\n\t actual: %q",
string(rt.expectedBytes),
string(actualBytes),
)
}
t.Run(rt.name, func(t *testing.T) {
rt.buf = bytes.NewBufferString("")
printAvailableUpgrades(rt.upgrades, rt.buf, featureGates, rt.externalEtcd)
actualBytes := rt.buf.Bytes()
if !bytes.Equal(actualBytes, rt.expectedBytes) {
t.Errorf(
"failed PrintAvailableUpgrades:\n\texpected: %q\n\t actual: %q",
string(rt.expectedBytes),
string(actualBytes),
)
}
})
}
}

View File

@ -50,6 +50,11 @@ func (u *Upgrade) CanUpgradeKubelets() bool {
return !sameVersionFound
}
// CanUpgradeEtcd returns whether an upgrade of etcd is possible
func (u *Upgrade) CanUpgradeEtcd() bool {
return u.Before.EtcdVersion != u.After.EtcdVersion
}
// ActiveDNSAddon returns the version of CoreDNS or kube-dns
func ActiveDNSAddon(featureGates map[string]bool) string {
if features.Enabled(featureGates, features.CoreDNS) {
@ -74,7 +79,7 @@ type ClusterState struct {
// GetAvailableUpgrades fetches all versions from the specified VersionGetter and computes which
// kinds of upgrades can be performed
func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesAllowed, rcUpgradesAllowed bool, etcdClient etcdutil.Client, featureGates map[string]bool) ([]Upgrade, error) {
func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesAllowed, rcUpgradesAllowed bool, etcdClient etcdutil.ClusterInterrogator, featureGates map[string]bool) ([]Upgrade, error) {
fmt.Println("[upgrade] Fetching available versions to upgrade to")
// Collect the upgrades kubeadm can do in this list
@ -83,13 +88,13 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA
// Get the cluster version
clusterVersionStr, clusterVersion, err := versionGetterImpl.ClusterVersion()
if err != nil {
return nil, err
return upgrades, err
}
// Get current kubeadm CLI version
kubeadmVersionStr, kubeadmVersion, err := versionGetterImpl.KubeadmVersion()
if err != nil {
return nil, err
return upgrades, err
}
// Get and output the current latest stable version
@ -103,13 +108,13 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA
// Get the kubelet versions in the cluster
kubeletVersions, err := versionGetterImpl.KubeletVersions()
if err != nil {
return nil, err
return upgrades, err
}
// Get current etcd version
etcdStatus, err := etcdClient.GetStatus()
etcdVersion, err := etcdClient.GetVersion()
if err != nil {
return nil, err
return upgrades, err
}
// Construct a descriptor for the current state of the world
@ -118,7 +123,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA
DNSVersion: dns.GetDNSVersion(clusterVersion, ActiveDNSAddon(featureGates)),
KubeadmVersion: kubeadmVersionStr,
KubeletVersions: kubeletVersions,
EtcdVersion: etcdStatus.Version,
EtcdVersion: etcdVersion,
}
// Do a "dumb guess" that a new minor upgrade is available just because the latest stable version is higher than the cluster version
@ -201,7 +206,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA
// Get and output the current latest unstable version
latestVersionStr, latestVersion, err := versionGetterImpl.VersionFromCILabel("latest", "experimental version")
if err != nil {
return nil, err
return upgrades, err
}
minorUnstable := latestVersion.Components()[1]
@ -209,7 +214,7 @@ func GetAvailableUpgrades(versionGetterImpl VersionGetter, experimentalUpgradesA
previousBranch := fmt.Sprintf("latest-1.%d", minorUnstable-1)
previousBranchLatestVersionStr, previousBranchLatestVersion, err := versionGetterImpl.VersionFromCILabel(previousBranch, "")
if err != nil {
return nil, err
return upgrades, err
}
// If that previous latest version is an RC, RCs are allowed and the cluster version is lower than the RC version, show the upgrade

View File

@ -17,11 +17,13 @@ limitations under the License.
package upgrade
import (
"fmt"
"reflect"
"testing"
"time"
"github.com/coreos/etcd/clientv3"
etcdutil "k8s.io/kubernetes/cmd/kubeadm/app/util/etcd"
versionutil "k8s.io/kubernetes/pkg/util/version"
)
@ -62,28 +64,54 @@ func (f *fakeVersionGetter) KubeletVersions() (map[string]uint16, error) {
}, nil
}
type fakeEtcdCluster struct{ TLS bool }
func (f fakeEtcdCluster) HasTLS() bool { return f.TLS }
func (f fakeEtcdCluster) GetStatus() (*clientv3.StatusResponse, error) {
client := &clientv3.StatusResponse{}
client.Version = "3.1.12"
return client, nil
type fakeEtcdClient struct {
TLS bool
mismatchedVersions bool
}
func (f fakeEtcdCluster) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) {
return f.GetStatus()
func (f fakeEtcdClient) HasTLS() bool { return f.TLS }
func (f fakeEtcdClient) ClusterAvailable() (bool, error) { return true, nil }
func (f fakeEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
return true, nil
}
func (f fakeEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) {
return make(map[string]*clientv3.StatusResponse), nil
}
func (f fakeEtcdClient) GetVersion() (string, error) {
versions, _ := f.GetClusterVersions()
if f.mismatchedVersions {
return "", fmt.Errorf("etcd cluster contains endpoints with mismatched versions: %v", versions)
}
return "3.1.12", nil
}
func (f fakeEtcdClient) GetClusterVersions() (map[string]string, error) {
if f.mismatchedVersions {
return map[string]string{
"foo": "3.1.12",
"bar": "3.2.0",
}, nil
}
return map[string]string{
"foo": "3.1.12",
"bar": "3.1.12",
}, nil
}
func TestGetAvailableUpgrades(t *testing.T) {
featureGates := make(map[string]bool)
etcdClient := fakeEtcdClient{}
tests := []struct {
name string
vg VersionGetter
expectedUpgrades []Upgrade
allowExperimental, allowRCs bool
errExpected bool
etcdClient etcdutil.ClusterInterrogator
}{
{
name: "no action needed, already up-to-date",
@ -98,6 +126,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
expectedUpgrades: []Upgrade{},
allowExperimental: false,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "simple patch version upgrade",
@ -131,6 +160,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: false,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "no version provided to offline version getter does not change behavior",
@ -164,6 +194,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: false,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "minor version upgrade only",
@ -197,6 +228,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: false,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "both minor version upgrade and patch version upgrade available",
@ -248,6 +280,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: false,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "allow experimental upgrades, but no upgrade available",
@ -263,6 +296,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
expectedUpgrades: []Upgrade{},
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "upgrade to an unstable version should be supported",
@ -297,6 +331,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "upgrade from an unstable version to an unstable version should be supported",
@ -331,6 +366,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "v1.X.0-alpha.0 should be ignored",
@ -366,6 +402,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "upgrade to an RC version should be supported",
@ -401,6 +438,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowRCs: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "it is possible (but very uncommon) that the latest version from the previous branch is an rc and the current latest version is alpha.0. In that case, show the RC",
@ -436,6 +474,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
},
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "upgrade to an RC version should be supported. There may also be an even newer unstable version.",
@ -490,6 +529,22 @@ func TestGetAvailableUpgrades(t *testing.T) {
allowRCs: true,
allowExperimental: true,
errExpected: false,
etcdClient: etcdClient,
},
{
name: "Upgrades with external etcd with mismatched versions should not be allowed.",
vg: &fakeVersionGetter{
clusterVersion: "v1.9.3",
kubeletVersion: "v1.9.3",
kubeadmVersion: "v1.9.3",
stablePatchVersion: "v1.9.3",
stableVersion: "v1.9.3",
},
allowRCs: false,
allowExperimental: false,
etcdClient: fakeEtcdClient{mismatchedVersions: true},
expectedUpgrades: []Upgrade{},
errExpected: true,
},
{
name: "offline version getter",
@ -498,6 +553,7 @@ func TestGetAvailableUpgrades(t *testing.T) {
kubeletVersion: "v1.10.0",
kubeadmVersion: "v1.10.1",
}, "v1.11.1"),
etcdClient: etcdClient,
expectedUpgrades: []Upgrade{
{
Description: "version in the v1.1 series",
@ -523,16 +579,19 @@ func TestGetAvailableUpgrades(t *testing.T) {
// Instantiating a fake etcd cluster for being able to get etcd version for a corresponding
// kubernetes release.
testCluster := fakeEtcdCluster{}
for _, rt := range tests {
t.Run(rt.name, func(t *testing.T) {
actualUpgrades, actualErr := GetAvailableUpgrades(rt.vg, rt.allowExperimental, rt.allowRCs, testCluster, featureGates)
actualUpgrades, actualErr := GetAvailableUpgrades(rt.vg, rt.allowExperimental, rt.allowRCs, rt.etcdClient, featureGates)
fmt.Printf("actualErr: %v\n", actualErr)
fmt.Printf("actualErr != nil: %v\n", actualErr != nil)
fmt.Printf("errExpected: %v\n", rt.errExpected)
if (actualErr != nil) != rt.errExpected {
fmt.Printf("Hello error")
t.Errorf("failed TestGetAvailableUpgrades\n\texpected error: %t\n\tgot error: %t", rt.errExpected, (actualErr != nil))
}
if !reflect.DeepEqual(actualUpgrades, rt.expectedUpgrades) {
t.Errorf("failed TestGetAvailableUpgrades\n\texpected upgrades: %v\n\tgot: %v", rt.expectedUpgrades, actualUpgrades)
}
if (actualErr != nil) != rt.errExpected {
t.Errorf("failed TestGetAvailableUpgrades\n\texpected error: %t\n\tgot error: %t", rt.errExpected, (actualErr != nil))
}
})
}
}

View File

@ -224,14 +224,14 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
}
// performEtcdStaticPodUpgrade performs upgrade of etcd, it returns bool which indicates fatal error or not and the actual error.
func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, recoverManifests map[string]string, isTLSUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.Client) (bool, error) {
func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, recoverManifests map[string]string, isTLSUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) (bool, error) {
// Add etcd static pod spec only if external etcd is not configured
if len(cfg.Etcd.Endpoints) != 0 {
return false, fmt.Errorf("external etcd detected, won't try to change any etcd state")
}
// Checking health state of etcd before proceeding with the upgrade
etcdStatus, err := oldEtcdClient.GetStatus()
_, err := oldEtcdClient.GetClusterStatus()
if err != nil {
return true, fmt.Errorf("etcd cluster is not healthy: %v", err)
}
@ -248,9 +248,13 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
if err != nil {
return true, fmt.Errorf("failed to retrieve an etcd version for the target kubernetes version: %v", err)
}
currentEtcdVersion, err := version.ParseSemantic(etcdStatus.Version)
currentEtcdVersionStr, err := oldEtcdClient.GetVersion()
if err != nil {
return true, fmt.Errorf("failed to parse the current etcd version(%s): %v", etcdStatus.Version, err)
return true, fmt.Errorf("failed to retrieve the current etcd version: %v", err)
}
currentEtcdVersion, err := version.ParseSemantic(currentEtcdVersionStr)
if err != nil {
return true, fmt.Errorf("failed to parse the current etcd version(%s): %v", currentEtcdVersionStr, err)
}
// Comparing current etcd version with desired to catch the same version or downgrade condition and fail on them.
@ -292,7 +296,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
// Since upgrade component failed, the old etcd manifest has either been restored or was never touched
// Now we need to check the health of etcd cluster if it is up with old manifest
fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil {
if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil {
fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
// At this point we know that etcd cluster is dead and it is safe to copy backup datastore and to rollback old etcd manifest
@ -305,7 +309,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
// Now that we've rolled back the data, let's check if the cluster comes up
fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil {
if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil {
fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
// Nothing else left to try to recover etcd cluster
return true, fmt.Errorf("fatal error rolling back local etcd cluster manifest: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
@ -321,7 +325,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
// Initialize the new etcd client if it wasn't pre-initialized
if newEtcdClient == nil {
client, err := etcdutil.NewStaticPodClient(
client, err := etcdutil.NewFromStaticPod(
[]string{"localhost:2379"},
constants.GetStaticPodDirectory(),
cfg.CertificatesDir,
@ -334,7 +338,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
// Checking health state of etcd after the upgrade
fmt.Println("[upgrade/etcd] Waiting for etcd to become available")
if _, err = newEtcdClient.WaitForStatus(podRestartDelay, retries, retryInterval); err != nil {
if _, err = newEtcdClient.WaitForClusterAvailable(podRestartDelay, retries, retryInterval); err != nil {
fmt.Printf("[upgrade/etcd] Failed to healthcheck etcd: %v\n", err)
// Despite the fact that upgradeComponent was successful, there is something wrong with the etcd cluster
// First step is to restore back up of datastore
@ -352,7 +356,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
// Assuming rollback of the old etcd manifest was successful, check the status of etcd cluster again
fmt.Println("[upgrade/etcd] Waiting for previous etcd to become available")
if _, err := oldEtcdClient.WaitForStatus(noDelay, retries, retryInterval); err != nil {
if _, err := oldEtcdClient.WaitForClusterAvailable(noDelay, retries, retryInterval); err != nil {
fmt.Printf("[upgrade/etcd] Failed to healthcheck previous etcd: %v\n", err)
// Nothing else left to try to recover etcd cluster
return true, fmt.Errorf("fatal error rolling back local etcd cluster manifest: %v, the backup of etcd database is stored here:(%s)", err, backupEtcdDir)
@ -367,7 +371,7 @@ func performEtcdStaticPodUpgrade(waiter apiclient.Waiter, pathMgr StaticPodPathM
}
// StaticPodControlPlane upgrades a static pod-hosted control plane
func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, etcdUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.Client) error {
func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.MasterConfiguration, etcdUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) error {
recoverManifests := map[string]string{}
var isTLSUpgrade bool
var isExternalEtcd bool
@ -381,7 +385,7 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager
if len(cfg.Etcd.Endpoints) > 0 {
// External etcd
isExternalEtcd = true
client, err := etcdutil.NewClient(
client, err := etcdutil.New(
cfg.Etcd.Endpoints,
cfg.Etcd.CAFile,
cfg.Etcd.CertFile,
@ -397,7 +401,7 @@ func StaticPodControlPlane(waiter apiclient.Waiter, pathMgr StaticPodPathManager
}
} else {
// etcd Static Pod
client, err := etcdutil.NewStaticPodClient(
client, err := etcdutil.NewFromStaticPod(
[]string{"localhost:2379"},
constants.GetStaticPodDirectory(),
cfg.CertificatesDir,

View File

@ -215,14 +215,27 @@ func (c fakeTLSEtcdClient) HasTLS() bool {
return c.TLS
}
func (c fakeTLSEtcdClient) GetStatus() (*clientv3.StatusResponse, error) {
client := &clientv3.StatusResponse{}
client.Version = "3.1.12"
return client, nil
func (c fakeTLSEtcdClient) ClusterAvailable() (bool, error) { return true, nil }
func (c fakeTLSEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
return true, nil
}
func (c fakeTLSEtcdClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) {
return c.GetStatus()
func (c fakeTLSEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) {
return map[string]*clientv3.StatusResponse{
"foo": {
Version: "3.1.12",
}}, nil
}
func (c fakeTLSEtcdClient) GetClusterVersions() (map[string]string, error) {
return map[string]string{
"foo": "3.1.12",
}, nil
}
func (c fakeTLSEtcdClient) GetVersion() (string, error) {
return "3.1.12", nil
}
type fakePodManifestEtcdClient struct{ ManifestDir, CertificatesDir string }
@ -232,7 +245,13 @@ func (c fakePodManifestEtcdClient) HasTLS() bool {
return hasTLS
}
func (c fakePodManifestEtcdClient) GetStatus() (*clientv3.StatusResponse, error) {
func (c fakePodManifestEtcdClient) ClusterAvailable() (bool, error) { return true, nil }
func (c fakePodManifestEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
return true, nil
}
func (c fakePodManifestEtcdClient) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) {
// Make sure the certificates generated from the upgrade are readable from disk
tlsInfo := transport.TLSInfo{
CertFile: filepath.Join(c.CertificatesDir, constants.EtcdCACertName),
@ -244,13 +263,19 @@ func (c fakePodManifestEtcdClient) GetStatus() (*clientv3.StatusResponse, error)
return nil, err
}
client := &clientv3.StatusResponse{}
client.Version = "3.1.12"
return client, nil
return map[string]*clientv3.StatusResponse{
"foo": {Version: "3.1.12"},
}, nil
}
func (c fakePodManifestEtcdClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) {
return c.GetStatus()
func (c fakePodManifestEtcdClient) GetClusterVersions() (map[string]string, error) {
return map[string]string{
"foo": "3.1.12",
}, nil
}
func (c fakePodManifestEtcdClient) GetVersion() (string, error) {
return "3.1.12", nil
}
func TestStaticPodControlPlane(t *testing.T) {

View File

@ -30,22 +30,25 @@ import (
"k8s.io/kubernetes/cmd/kubeadm/app/util/staticpod"
)
// Client is an interface to get etcd cluster related information
type Client interface {
GetStatus() (*clientv3.StatusResponse, error)
WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error)
// ClusterInterrogator is an interface to get etcd cluster related information
type ClusterInterrogator interface {
ClusterAvailable() (bool, error)
GetClusterStatus() (map[string]*clientv3.StatusResponse, error)
GetClusterVersions() (map[string]string, error)
GetVersion() (string, error)
HasTLS() bool
WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error)
}
// GenericClient is a common etcd client for supported etcd servers
type GenericClient struct {
// Client provides connection parameters for an etcd cluster
type Client struct {
Endpoints []string
TLSConfig *tls.Config
TLS *tls.Config
}
// HasTLS returns true if etcd is configured for TLS
func (c GenericClient) HasTLS() bool {
return c.TLSConfig != nil
func (c Client) HasTLS() bool {
return c.TLS != nil
}
// PodManifestsHaveTLS reads the etcd staticpod manifest from disk and returns false if the TLS flags
@ -83,31 +86,118 @@ FlagLoop:
return true, nil
}
// GetStatus gets server status
func (c GenericClient) GetStatus() (*clientv3.StatusResponse, error) {
const dialTimeout = 5 * time.Second
// New creates a new EtcdCluster client
func New(endpoints []string, ca, cert, key string) (*Client, error) {
client := Client{Endpoints: endpoints}
if ca != "" || cert != "" || key != "" {
tlsInfo := transport.TLSInfo{
CertFile: cert,
KeyFile: key,
TrustedCAFile: ca,
}
tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
return nil, err
}
client.TLS = tlsConfig
}
return &client, nil
}
// NewFromStaticPod creates a GenericClient from the given endpoints, manifestDir, and certificatesDir
func NewFromStaticPod(endpoints []string, manifestDir string, certificatesDir string) (*Client, error) {
hasTLS, err := PodManifestsHaveTLS(manifestDir)
if err != nil {
return nil, fmt.Errorf("could not read manifests from: %s, error: %v", manifestDir, err)
}
if hasTLS {
return New(
endpoints,
filepath.Join(certificatesDir, constants.EtcdCACertName),
filepath.Join(certificatesDir, constants.EtcdHealthcheckClientCertName),
filepath.Join(certificatesDir, constants.EtcdHealthcheckClientKeyName),
)
}
return New(endpoints, "", "", "")
}
// GetVersion returns the etcd version of the cluster.
// An error is returned if the version of all endpoints do not match
func (c Client) GetVersion() (string, error) {
var clusterVersion string
versions, err := c.GetClusterVersions()
if err != nil {
return "", err
}
for _, v := range versions {
if clusterVersion == "" {
// This is the first version we've seen
clusterVersion = v
} else if v != clusterVersion {
return "", fmt.Errorf("etcd cluster contains endpoints with mismatched versions: %v", versions)
} else {
clusterVersion = v
}
}
if clusterVersion == "" {
return "", fmt.Errorf("could not determine cluster etcd version")
}
return clusterVersion, nil
}
// GetClusterVersions returns a map of the endpoints and their associated versions
func (c Client) GetClusterVersions() (map[string]string, error) {
versions := make(map[string]string)
statuses, err := c.GetClusterStatus()
if err != nil {
return versions, err
}
for ep, status := range statuses {
versions[ep] = status.Version
}
return versions, nil
}
// ClusterAvailable returns true if the cluster status indicates the cluster is available.
func (c Client) ClusterAvailable() (bool, error) {
_, err := c.GetClusterStatus()
if err != nil {
return false, err
}
return true, nil
}
// GetClusterStatus returns nil for status Up or error for status Down
func (c Client) GetClusterStatus() (map[string]*clientv3.StatusResponse, error) {
cli, err := clientv3.New(clientv3.Config{
Endpoints: c.Endpoints,
DialTimeout: dialTimeout,
TLS: c.TLSConfig,
DialTimeout: 5 * time.Second,
TLS: c.TLS,
})
if err != nil {
return nil, err
}
defer cli.Close()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := cli.Status(ctx, c.Endpoints[0])
cancel()
if err != nil {
return nil, err
clusterStatus := make(map[string]*clientv3.StatusResponse)
for _, ep := range c.Endpoints {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := cli.Status(ctx, ep)
cancel()
if err != nil {
return nil, err
}
clusterStatus[ep] = resp
}
return resp, nil
return clusterStatus, nil
}
// WaitForStatus returns a StatusResponse after an initial delay and retry attempts
func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInterval time.Duration) (*clientv3.StatusResponse, error) {
// WaitForClusterAvailable returns true if all endpoints in the cluster are available after an initial delay and retry attempts, an error is returned otherwise
func (c Client) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
fmt.Printf("[util/etcd] Waiting %v for initial delay\n", delay)
time.Sleep(delay)
for i := 0; i < retries; i++ {
@ -115,8 +205,8 @@ func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInte
fmt.Printf("[util/etcd] Waiting %v until next retry\n", retryInterval)
time.Sleep(retryInterval)
}
fmt.Printf("[util/etcd] Attempting to get etcd status %d/%d\n", i+1, retries)
resp, err := c.GetStatus()
fmt.Printf("[util/etcd] Attempting to see if all cluster endpoints are available %d/%d\n", i+1, retries)
resp, err := c.ClusterAvailable()
if err != nil {
switch err {
case context.DeadlineExceeded:
@ -128,42 +218,5 @@ func (c GenericClient) WaitForStatus(delay time.Duration, retries int, retryInte
}
return resp, nil
}
return nil, fmt.Errorf("timeout waiting for etcd cluster status")
}
// NewClient creates a new EtcdCluster client
func NewClient(endpoints []string, caFile string, certFile string, keyFile string) (*GenericClient, error) {
client := GenericClient{Endpoints: endpoints}
if caFile != "" || certFile != "" || keyFile != "" {
tlsInfo := transport.TLSInfo{
CertFile: certFile,
KeyFile: keyFile,
TrustedCAFile: caFile,
}
tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
return nil, err
}
client.TLSConfig = tlsConfig
}
return &client, nil
}
// NewStaticPodClient creates a GenericClient from the given endpoints, manifestDir, and certificatesDir
func NewStaticPodClient(endpoints []string, manifestDir string, certificatesDir string) (*GenericClient, error) {
hasTLS, err := PodManifestsHaveTLS(manifestDir)
if err != nil {
return nil, fmt.Errorf("could not read manifests from: %s, error: %v", manifestDir, err)
}
if hasTLS {
return NewClient(
endpoints,
filepath.Join(certificatesDir, constants.EtcdCACertName),
filepath.Join(certificatesDir, constants.EtcdHealthcheckClientCertName),
filepath.Join(certificatesDir, constants.EtcdHealthcheckClientKeyName),
)
}
return NewClient(endpoints, "", "", "")
return false, fmt.Errorf("timeout waiting for etcd cluster to be available")
}