From b9b0cac1222f3df238f363e93873b2ff0a37e41d Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Tue, 29 Jun 2021 10:29:05 -0400 Subject: [PATCH] storagedriver/s3: Major change to the S3 Walk impl to infer directories to walk between files. This is needed for manifest enumeration among others --- registry/storage/driver/s3-aws/s3.go | 80 ++++++++++++++++------- registry/storage/driver/s3-aws/s3_test.go | 64 +++++++++--------- 2 files changed, 88 insertions(+), 56 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index d5f42318a..8029490e4 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1091,6 +1091,7 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre // the most recent skip directory to avoid walking over undesirable files prevSkipDir string ) + prevDir = prefix + path listObjectsInput := &s3.ListObjectsV2Input{ Bucket: aws.String(d.Bucket), @@ -1116,12 +1117,28 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre walkInfos := make([]storagedriver.FileInfoInternal, 0, len(objects.Contents)) for _, file := range objects.Contents { + filePath := strings.Replace(*file.Key, d.s3Path(""), prefix, 1) + + // get a list of all inferred directories skipped between the previous directory and this file + dirs := directoryDiff(prevDir, filePath) + if len(dirs) > 0 { + for _, dir := range dirs { + walkInfos = append(walkInfos, storagedriver.FileInfoInternal{ + FileInfoFields: storagedriver.FileInfoFields{ + IsDir: true, + Path: dir, + }, + }) + prevDir = dir + } + } + walkInfos = append(walkInfos, storagedriver.FileInfoInternal{ FileInfoFields: storagedriver.FileInfoFields{ IsDir: false, Size: *file.Size, ModTime: *file.LastModified, - Path: strings.Replace(*file.Key, d.s3Path(""), prefix, 1), + Path: filePath, }, }) } @@ -1132,35 +1149,16 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre continue } - // walk over file's parent directory if not a duplicate - dir := filepath.Dir(walkInfo.Path()) - if dir != prevDir { - prevDir = dir - walkDirInfo := storagedriver.FileInfoInternal{ - FileInfoFields: storagedriver.FileInfoFields{ - IsDir: true, - Path: dir, - }, - } - err := f(walkDirInfo) - *objectCount++ - - if err != nil { - if err == storagedriver.ErrSkipDir { - prevSkipDir = dir - continue - } - retError = err - return false - } - } - err := f(walkInfo) *objectCount++ if err != nil { if err == storagedriver.ErrSkipDir { - // stop early without return error + if walkInfo.IsDir() { + prevSkipDir = walkInfo.Path() + continue + } + // is file, stop gracefully return false } retError = err @@ -1181,6 +1179,38 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre return nil } +// directoryDiff finds all directories that are not in common between +// the previous and current paths in sorted order. +// +// Eg 1 directoryDiff("/path/to/folder", "/path/to/folder/folder/file") +// => [ "/path/to/folder/folder" ], +// Eg 2 directoryDiff("/path/to/folder/folder1", "/path/to/folder/folder2/file") +// => [ "/path/to/folder/folder2" ] +// Eg 3 directoryDiff("/path/to/folder/folder1/file", "/path/to/folder/folder2/file") +// => [ "/path/to/folder/folder2" ] +// Eg 4 directoryDiff("/path/to/folder/folder1/file", "/path/to/folder/folder2/folder1/file") +// => [ "/path/to/folder/folder2", "/path/to/folder/folder2/folder1" ] +// Eg 5 directoryDiff("/", "/path/to/folder/folder/file") +// => [ "/path", "/path/to", "/path/to/folder", "/path/to/folder/folder" ], +func directoryDiff(prev, current string) []string { + var parents []string + + if prev == "" || current == "" { + return parents + } + + parent := current + for { + parent = filepath.Dir(parent) + if parent == "/" || parent == prev || strings.HasPrefix(prev, parent) { + break + } + parents = append(parents, parent) + } + sort.Sort(sort.StringSlice(parents)) + return parents +} + func (d *driver) s3Path(path string) string { return strings.TrimLeft(strings.TrimRight(d.RootDirectory, "/")+path, "/") } diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index 390922761..4c80863a4 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -259,29 +259,24 @@ func TestWalk(t *testing.T) { t.Fatalf("unexpected error creating driver with standard storage: %v", err) } - var fileset = map[string][]string{ - "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, - "/folder2": {"/folder2/file1"}, - } - isDir := func(path string) bool { - _, isDir := fileset[path] - return isDir + var fileset = []string{ + "/file1", + "/folder1/file1", + "/folder2/file1", + "/folder3/subfolder1/subfolder1/file1", + "/folder3/subfolder2/subfolder1/file1", + "/folder4/file1", } // create file structure matching fileset above var created []string - for _, paths := range fileset { - for _, path := range paths { - if _, isDir := fileset[path]; isDir { - continue // skip directories - } - err := driver.PutContent(context.Background(), path, []byte("content "+path)) - if err != nil { - fmt.Printf("unable to create file %s: %s\n", path, err) - } - created = append(created, path) + for _, path := range fileset { + err := driver.PutContent(context.Background(), path, []byte("content "+path)) + if err != nil { + fmt.Printf("unable to create file %s: %s\n", path, err) + continue } + created = append(created, path) } // cleanup @@ -310,32 +305,43 @@ func TestWalk(t *testing.T) { name: "walk all", fn: func(fileInfo storagedriver.FileInfo) error { return nil }, expected: []string{ - "/", "/file1", "/folder1", "/folder1/file1", "/folder2", "/folder2/file1", + "/folder3", + "/folder3/subfolder1", + "/folder3/subfolder1/subfolder1", + "/folder3/subfolder1/subfolder1/file1", + "/folder3/subfolder2", + "/folder3/subfolder2/subfolder1", + "/folder3/subfolder2/subfolder1/file1", + "/folder4", + "/folder4/file1", }, }, { name: "skip directory", fn: func(fileInfo storagedriver.FileInfo) error { - if fileInfo.Path() == "/folder1" { + if fileInfo.Path() == "/folder3" { return storagedriver.ErrSkipDir } - if strings.Contains(fileInfo.Path(), "/folder1") { - t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) + if strings.Contains(fileInfo.Path(), "/folder3") { + t.Fatalf("skipped dir %s and should not walk %s", "/folder3", fileInfo.Path()) } return nil }, expected: []string{ - "/", "/file1", - "/folder1", // return ErrSkipDir, skip anything under /folder1 - // skip /folder1/file1 + "/folder1", + "/folder1/file1", "/folder2", "/folder2/file1", + "/folder3", + // folder 3 contents skipped + "/folder4", + "/folder4/file1", }, }, { @@ -347,7 +353,6 @@ func TestWalk(t *testing.T) { return nil }, expected: []string{ - "/", "/file1", "/folder1", "/folder1/file1", @@ -361,7 +366,7 @@ func TestWalk(t *testing.T) { return errors.New("foo") }, expected: []string{ - "/", + "/file1", }, err: true, }, @@ -384,9 +389,6 @@ func TestWalk(t *testing.T) { t.Run(tc.name, func(t *testing.T) { err := driver.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error { walked = append(walked, fileInfo.Path()) - if fileInfo.IsDir() != isDir(fileInfo.Path()) { - t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", isDir(fileInfo.Path()), fileInfo.IsDir()) - } return tc.fn(fileInfo) }) if tc.err && err == nil { @@ -493,7 +495,7 @@ func compareWalked(t *testing.T, expected, walked []string) { } for i := range walked { if walked[i] != expected[i] { - t.Fatalf("expected walked to come in order expected: walked %s", walked) + t.Fatalf("walked in unexpected order: expected %s; walked %s", expected, walked) } } }