Merge pull request #122807 from carlory/fix-121472

Fix AtomicWriter may not create user visible files after kubelet was restarted
This commit is contained in:
Kubernetes Prow Robot 2024-01-30 04:19:26 -08:00 committed by GitHub
commit fedb5842e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 120 additions and 44 deletions

View File

@ -101,9 +101,9 @@ const (
// portion of the payload was deleted and is still present on disk. // portion of the payload was deleted and is still present on disk.
// //
// 4. The data in the current timestamped directory is compared to the projected // 4. The data in the current timestamped directory is compared to the projected
// data to determine if an update is required. // data to determine if an update to data directory is required.
// //
// 5. A new timestamped dir is created. // 5. A new timestamped dir is created if an update is required.
// //
// 6. The payload is written to the new timestamped directory. // 6. The payload is written to the new timestamped directory.
// //
@ -159,6 +159,7 @@ func (w *AtomicWriter) Write(payload map[string]FileProjection, setPerms func(su
oldTsPath := filepath.Join(w.targetDir, oldTsDir) oldTsPath := filepath.Join(w.targetDir, oldTsDir)
var pathsToRemove sets.String var pathsToRemove sets.String
shouldWrite := true
// if there was no old version, there's nothing to remove // if there was no old version, there's nothing to remove
if len(oldTsDir) != 0 { if len(oldTsDir) != 0 {
// (3) // (3)
@ -173,57 +174,74 @@ func (w *AtomicWriter) Write(payload map[string]FileProjection, setPerms func(su
klog.Errorf("%s: error determining whether payload should be written to disk: %v", w.logContext, err) klog.Errorf("%s: error determining whether payload should be written to disk: %v", w.logContext, err)
return err return err
} else if !should && len(pathsToRemove) == 0 { } else if !should && len(pathsToRemove) == 0 {
klog.V(4).Infof("%s: no update required for target directory %v", w.logContext, w.targetDir) klog.V(4).Infof("%s: write not required for data directory %v", w.logContext, oldTsDir)
return nil // data directory is already up to date, but we need to make sure that
// the user-visible symlinks are created.
// See https://github.com/kubernetes/kubernetes/issues/121472 for more details.
// Reset oldTsDir to empty string to avoid removing the data directory.
shouldWrite = false
oldTsDir = ""
} else { } else {
klog.V(4).Infof("%s: write required for target directory %v", w.logContext, w.targetDir) klog.V(4).Infof("%s: write required for target directory %v", w.logContext, w.targetDir)
} }
} }
// (5) if shouldWrite {
tsDir, err := w.newTimestampDir() // (5)
if err != nil { tsDir, err := w.newTimestampDir()
klog.V(4).Infof("%s: error creating new ts data directory: %v", w.logContext, err) if err != nil {
return err klog.V(4).Infof("%s: error creating new ts data directory: %v", w.logContext, err)
}
tsDirName := filepath.Base(tsDir)
// (6)
if err = w.writePayloadToDir(cleanPayload, tsDir); err != nil {
klog.Errorf("%s: error writing payload to ts data directory %s: %v", w.logContext, tsDir, err)
return err
}
klog.V(4).Infof("%s: performed write of new data to ts data directory: %s", w.logContext, tsDir)
// (7)
if setPerms != nil {
if err := setPerms(tsDirName); err != nil {
klog.Errorf("%s: error applying ownership settings: %v", w.logContext, err)
return err return err
} }
} tsDirName := filepath.Base(tsDir)
// (8) // (6)
newDataDirPath := filepath.Join(w.targetDir, newDataDirName) if err = w.writePayloadToDir(cleanPayload, tsDir); err != nil {
if err = os.Symlink(tsDirName, newDataDirPath); err != nil { klog.Errorf("%s: error writing payload to ts data directory %s: %v", w.logContext, tsDir, err)
os.RemoveAll(tsDir) return err
klog.Errorf("%s: error creating symbolic link for atomic update: %v", w.logContext, err) }
return err klog.V(4).Infof("%s: performed write of new data to ts data directory: %s", w.logContext, tsDir)
}
// (9) // (7)
if runtime.GOOS == "windows" { if setPerms != nil {
os.Remove(dataDirPath) if err := setPerms(tsDirName); err != nil {
err = os.Symlink(tsDirName, dataDirPath) klog.Errorf("%s: error applying ownership settings: %v", w.logContext, err)
os.Remove(newDataDirPath) return err
} else { }
err = os.Rename(newDataDirPath, dataDirPath) }
}
if err != nil { // (8)
os.Remove(newDataDirPath) newDataDirPath := filepath.Join(w.targetDir, newDataDirName)
os.RemoveAll(tsDir) if err = os.Symlink(tsDirName, newDataDirPath); err != nil {
klog.Errorf("%s: error renaming symbolic link for data directory %s: %v", w.logContext, newDataDirPath, err) if err := os.RemoveAll(tsDir); err != nil {
return err klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err)
}
klog.Errorf("%s: error creating symbolic link for atomic update: %v", w.logContext, err)
return err
}
// (9)
if runtime.GOOS == "windows" {
if err := os.Remove(dataDirPath); err != nil {
klog.Errorf("%s: error removing data dir directory %s: %v", w.logContext, dataDirPath, err)
}
err = os.Symlink(tsDirName, dataDirPath)
if err := os.Remove(newDataDirPath); err != nil {
klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err)
}
} else {
err = os.Rename(newDataDirPath, dataDirPath)
}
if err != nil {
if err := os.Remove(newDataDirPath); err != nil && err != os.ErrNotExist {
klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err)
}
if err := os.RemoveAll(tsDir); err != nil {
klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err)
}
klog.Errorf("%s: error renaming symbolic link for data directory %s: %v", w.logContext, newDataDirPath, err)
return err
}
} }
// (10) // (10)

View File

@ -1035,3 +1035,61 @@ func TestSetPerms(t *testing.T) {
t.Fatalf("unexpected error while writing: %v", err) t.Fatalf("unexpected error while writing: %v", err)
} }
} }
func TestWriteAgainAfterUnexpectedExit(t *testing.T) {
testCases := []struct {
name string
payload map[string]FileProjection
simulateFn func(targetDir string, payload map[string]FileProjection) error
}{
{
name: "process killed before creating user visible files",
payload: map[string]FileProjection{
"foo": {Mode: 0644, Data: []byte("foo")},
"bar": {Mode: 0644, Data: []byte("bar")},
},
simulateFn: func(targetDir string, payload map[string]FileProjection) error {
for filename := range payload {
path := filepath.Join(targetDir, filename)
if err := os.RemoveAll(path); err != nil {
return err
}
}
return nil
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
targetDir, err := utiltesting.MkTmpdir("atomic-write")
if err != nil {
t.Fatalf("unexpected error creating tmp dir: %v", err)
}
defer func() {
err := os.RemoveAll(targetDir)
if err != nil {
t.Errorf("%v: unexpected error removing tmp dir: %v", tc.name, err)
}
}()
writer := &AtomicWriter{targetDir: targetDir, logContext: "-test-"}
err = writer.Write(tc.payload, nil)
if err != nil {
t.Fatalf("unexpected error writing payload: %v", err)
}
err = tc.simulateFn(targetDir, tc.payload)
if err != nil {
t.Fatalf("failed to simulate the unexpected exit: %v", err)
}
err = writer.Write(tc.payload, nil)
if err != nil {
t.Fatalf("unexpected error writing payload again: %v", err)
}
checkVolumeContents(targetDir, tc.name, tc.payload, t)
})
}
}