From f180a3f265e57a7b18eaff33cd93375248f29e63 Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Tue, 5 Jul 2022 17:22:47 +0800 Subject: [PATCH 1/2] Move the logic of file cleanup within each phase Guarantee that stale files are removed if end user resets cluster by resetting each phase. Signed-off-by: Dave Chen --- .../app/cmd/phases/reset/cleanupnode.go | 14 ++++---- .../app/cmd/phases/reset/cleanupnode_test.go | 6 +++- cmd/kubeadm/app/cmd/phases/reset/data.go | 1 - .../app/cmd/phases/reset/removeetcdmember.go | 9 ++++-- cmd/kubeadm/app/cmd/reset.go | 32 +------------------ 5 files changed, 19 insertions(+), 43 deletions(-) diff --git a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go index 70d0ea11ee2..c00296fd4a6 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go +++ b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go @@ -50,6 +50,7 @@ func NewCleanupNodePhase() workflow.Phase { } func runCleanupNode(c workflow.RunData) error { + dirsToClean := []string{filepath.Join(kubeadmconstants.KubernetesDir, kubeadmconstants.ManifestsSubDirName)} r, ok := c.(resetData) if !ok { return errors.New("cleanup-node phase invoked with an invalid data struct") @@ -81,7 +82,7 @@ func runCleanupNode(c workflow.RunData) error { kubeletRunDir, err := absoluteKubeletRunDirectory() if err == nil { // Only clean absoluteKubeletRunDirectory if umountDirsCmd passed without error - r.AddDirsToClean(kubeletRunDir) + dirsToClean = append(dirsToClean, kubeletRunDir) } } else { fmt.Printf("[reset] Would unmount mounted directories in %q\n", kubeadmconstants.KubeletRunDirectory) @@ -100,7 +101,8 @@ func runCleanupNode(c workflow.RunData) error { if certsDir != kubeadmapiv1.DefaultCertificatesDir { klog.Warningf("[reset] WARNING: Cleaning a non-default certificates directory: %q\n", certsDir) } - resetConfigDir(kubeadmconstants.KubernetesDir, certsDir, r.DryRun()) + dirsToClean = append(dirsToClean, certsDir) + resetConfigDir(kubeadmconstants.KubernetesDir, dirsToClean, r.DryRun()) if r.Cfg() != nil && features.Enabled(r.Cfg().FeatureGates, features.RootlessControlPlane) { if !r.DryRun() { @@ -142,12 +144,8 @@ func removeContainers(execer utilsexec.Interface, criSocketPath string) error { return containerRuntime.RemoveContainers(containers) } -// resetConfigDir is used to cleanup the files kubeadm writes in /etc/kubernetes/. -func resetConfigDir(configPathDir, pkiPathDir string, isDryRun bool) { - dirsToClean := []string{ - filepath.Join(configPathDir, kubeadmconstants.ManifestsSubDirName), - pkiPathDir, - } +// resetConfigDir is used to cleanup the files in the folder defined in dirsToClean. +func resetConfigDir(configPathDir string, dirsToClean []string, isDryRun bool) { if !isDryRun { fmt.Printf("[reset] Deleting contents of directories: %v\n", dirsToClean) for _, dir := range dirsToClean { diff --git a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode_test.go b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode_test.go index 044853c8b84..ed7d374893d 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode_test.go +++ b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode_test.go @@ -173,7 +173,11 @@ func TestConfigDirCleaner(t *testing.T) { if test.resetDir == "" { test.resetDir = "pki" } - resetConfigDir(tmpDir, filepath.Join(tmpDir, test.resetDir), false) + dirsToClean := []string{ + filepath.Join(tmpDir, test.resetDir), + filepath.Join(tmpDir, kubeadmconstants.ManifestsSubDirName), + } + resetConfigDir(tmpDir, dirsToClean, false) // Verify the files we cleanup implicitly in every test: assertExists(t, tmpDir) diff --git a/cmd/kubeadm/app/cmd/phases/reset/data.go b/cmd/kubeadm/app/cmd/phases/reset/data.go index 9862b2f262b..f85848900ec 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/data.go +++ b/cmd/kubeadm/app/cmd/phases/reset/data.go @@ -34,7 +34,6 @@ type resetData interface { Cfg() *kubeadmapi.InitConfiguration DryRun() bool Client() clientset.Interface - AddDirsToClean(dirs ...string) CertificatesDir() string CRISocketPath() string } diff --git a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go index 7eae4f2f8fe..6e854cdc616 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go +++ b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go @@ -56,14 +56,19 @@ func runRemoveETCDMemberPhase(c workflow.RunData) error { etcdManifestPath := filepath.Join(kubeadmconstants.KubernetesDir, kubeadmconstants.ManifestsSubDirName, "etcd.yaml") etcdDataDir, err := getEtcdDataDir(etcdManifestPath, cfg) if err == nil { - r.AddDirsToClean(etcdDataDir) if cfg != nil { if !r.DryRun() { - if err := etcdphase.RemoveStackedEtcdMemberFromCluster(r.Client(), cfg); err != nil { + err := etcdphase.RemoveStackedEtcdMemberFromCluster(r.Client(), cfg) + if err != nil { klog.Warningf("[reset] Failed to remove etcd member: %v, please manually remove this etcd member using etcdctl", err) + } else { + if err := CleanDir(etcdDataDir); err != nil { + klog.Warningf("[reset] Failed to delete contents of %q directory: %v", etcdDataDir, err) + } } } else { fmt.Println("[reset] Would remove the etcd member on this node from the etcd cluster") + fmt.Printf("[reset] Would delete contents of the etcd data directory: %v\n", etcdDataDir) } } } else { diff --git a/cmd/kubeadm/app/cmd/reset.go b/cmd/kubeadm/app/cmd/reset.go index 407603d1def..7e7c285621d 100644 --- a/cmd/kubeadm/app/cmd/reset.go +++ b/cmd/kubeadm/app/cmd/reset.go @@ -78,7 +78,6 @@ type resetData struct { inputReader io.Reader outputWriter io.Writer cfg *kubeadmapi.InitConfiguration - dirsToClean []string dryRun bool } @@ -178,20 +177,11 @@ func newCmdReset(in io.Reader, out io.Writer, resetOptions *resetOptions) *cobra Use: "reset", Short: "Performs a best effort revert of changes made to this host by 'kubeadm init' or 'kubeadm join'", RunE: func(cmd *cobra.Command, args []string) error { - c, err := resetRunner.InitData(args) + err := resetRunner.Run(args) if err != nil { return err } - err = resetRunner.Run(args) - if err != nil { - return err - } - - // Then clean contents from the stateful kubelet, etcd and cni directories - data := c.(*resetData) - cleanDirs(data) - // output help text instructing user how to remove cni folders fmt.Print(cniCleanupInstructions) // Output help text instructing user how to remove iptables rules @@ -220,21 +210,6 @@ func newCmdReset(in io.Reader, out io.Writer, resetOptions *resetOptions) *cobra return cmd } -func cleanDirs(data *resetData) { - if data.DryRun() { - fmt.Printf("[reset] Would delete contents of stateful directories: %v\n", data.dirsToClean) - return - } - - fmt.Printf("[reset] Deleting contents of stateful directories: %v\n", data.dirsToClean) - for _, dir := range data.dirsToClean { - klog.V(1).Infof("[reset] Deleting contents of %s", dir) - if err := phases.CleanDir(dir); err != nil { - klog.Warningf("[reset] Failed to delete contents of %q directory: %v", dir, err) - } - } -} - // Cfg returns the InitConfiguration. func (r *resetData) Cfg() *kubeadmapi.InitConfiguration { return r.cfg @@ -270,11 +245,6 @@ func (r *resetData) IgnorePreflightErrors() sets.String { return r.ignorePreflightErrors } -// AddDirsToClean add a list of dirs to the list of dirs that will be removed. -func (r *resetData) AddDirsToClean(dirs ...string) { - r.dirsToClean = append(r.dirsToClean, dirs...) -} - // CRISocketPath returns the criSocketPath. func (r *resetData) CRISocketPath() string { return r.criSocketPath From 71ef1ea68df05cafaabd59d8cf486ace5baeebf5 Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Tue, 5 Jul 2022 18:37:24 +0800 Subject: [PATCH 2/2] Cleanup etcd data dir on best effort basis Signed-off-by: Dave Chen --- .../app/cmd/phases/reset/cleanupnode.go | 14 +++++++++++ .../app/cmd/phases/reset/removeetcdmember.go | 25 +++++++++++++++++-- .../cmd/phases/reset/removeetcdmember_test.go | 5 ++-- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go index c00296fd4a6..26a40520b73 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go +++ b/cmd/kubeadm/app/cmd/phases/reset/cleanupnode.go @@ -19,6 +19,7 @@ package phases import ( "errors" "fmt" + "io" "os" "path/filepath" @@ -201,3 +202,16 @@ func CleanDir(filePath string) error { } return nil } + +func IsDirEmpty(dir string) (bool, error) { + d, err := os.Open(dir) + if err != nil { + return false, err + } + defer d.Close() + _, err = d.Readdirnames(1) + if err == io.EOF { + return true, nil + } + return false, nil +} diff --git a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go index 6e854cdc616..579ff5be63a 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go +++ b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember.go @@ -19,11 +19,14 @@ package phases import ( "errors" "fmt" + "os" "path/filepath" "k8s.io/klog/v2" kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" + "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/scheme" + "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3" "k8s.io/kubernetes/cmd/kubeadm/app/cmd/options" "k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow" kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants" @@ -63,7 +66,9 @@ func runRemoveETCDMemberPhase(c workflow.RunData) error { klog.Warningf("[reset] Failed to remove etcd member: %v, please manually remove this etcd member using etcdctl", err) } else { if err := CleanDir(etcdDataDir); err != nil { - klog.Warningf("[reset] Failed to delete contents of %q directory: %v", etcdDataDir, err) + klog.Warningf("[reset] Failed to delete contents of the etcd directory: %q, error: %v", etcdDataDir, err) + } else { + fmt.Printf("[reset] Deleted contents of the etcd data directory: %v\n", etcdDataDir) } } } else { @@ -71,6 +76,16 @@ func runRemoveETCDMemberPhase(c workflow.RunData) error { fmt.Printf("[reset] Would delete contents of the etcd data directory: %v\n", etcdDataDir) } } + // This could happen if the phase `cleanup-node` is run before the `remove-etcd-member`. + // Cleanup the data in the etcd data dir to avoid some stale files which might cause the failure to build cluster in the next time. + empty, _ := IsDirEmpty(etcdDataDir) + if !empty && !r.DryRun() { + if err := CleanDir(etcdDataDir); err != nil { + klog.Warningf("[reset] Failed to delete contents of the etcd directory: %q, error: %v", etcdDataDir, err) + } else { + fmt.Printf("[reset] Deleted contents of the etcd data directory: %v\n", etcdDataDir) + } + } } else { fmt.Println("[reset] No etcd config found. Assuming external etcd") fmt.Println("[reset] Please, manually reset etcd to prevent further issues") @@ -88,11 +103,17 @@ func getEtcdDataDir(manifestPath string, cfg *kubeadmapi.InitConfiguration) (str } klog.Warningln("[reset] No kubeadm config, using etcd pod spec to get data directory") + if _, err := os.Stat(manifestPath); os.IsNotExist(err) { + // Fall back to use the default cluster config if etcd.yaml doesn't exist, this could happen that + // etcd.yaml is removed by other reset phases, e.g. cleanup-node. + cfg := &v1beta3.ClusterConfiguration{} + scheme.Scheme.Default(cfg) + return cfg.Etcd.Local.DataDir, nil + } etcdPod, err := utilstaticpod.ReadStaticPodFromDisk(manifestPath) if err != nil { return "", err } - for _, volumeMount := range etcdPod.Spec.Volumes { if volumeMount.Name == etcdVolumeName { dataDir = volumeMount.HostPath.Path diff --git a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember_test.go b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember_test.go index 9407238c72c..27bce2a4ff2 100644 --- a/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember_test.go +++ b/cmd/kubeadm/app/cmd/phases/reset/removeetcdmember_test.go @@ -63,10 +63,11 @@ func TestGetEtcdDataDir(t *testing.T) { writeManifest bool validConfig bool }{ - "non-existent file returns error": { - expectErr: true, + "non-existent file returns default data dir": { + expectErr: false, writeManifest: false, validConfig: true, + dataDir: "/var/lib/etcd", }, "return etcd data dir": { dataDir: "/path/to/etcd",