From f31195b2e8bdcd89a4883f92baa01cace85b85f2 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Thu, 24 Jun 2021 14:42:02 -0400 Subject: [PATCH 01/22] storagedriver: Added a new WalkFiles method (later removed) --- registry/storage/driver/azure/azure.go | 8 +++- registry/storage/driver/base/base.go | 12 +++++ registry/storage/driver/filesystem/driver.go | 8 +++- registry/storage/driver/inmemory/driver.go | 8 +++- registry/storage/driver/s3-aws/s3.go | 48 ++++++++++++++++---- registry/storage/driver/storagedriver.go | 5 ++ registry/storage/driver/swift/swift.go | 8 +++- registry/storage/driver/walk.go | 38 ++++++++++++++++ 8 files changed, 123 insertions(+), 12 deletions(-) diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index 58c6293b2..c36fa7dfb 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -360,11 +360,17 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file +// from the given path, calling f on each file and directory func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkFiles traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFilesFallback(ctx, d, path, f) +} + // directDescendants will find direct descendants (blobs or virtual containers) // of from list of blob paths and will return their full paths. Elements in blobs // list must be prefixed with a "/" and diff --git a/registry/storage/driver/base/base.go b/registry/storage/driver/base/base.go index 3864eacf7..e2f65f270 100644 --- a/registry/storage/driver/base/base.go +++ b/registry/storage/driver/base/base.go @@ -241,3 +241,15 @@ func (base *Base) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return base.setDriverName(base.StorageDriver.Walk(ctx, path, f)) } + +// WalkFiles wraps WalkFiles of underlying storage driver. +func (base *Base) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { + ctx, done := dcontext.WithTrace(ctx) + defer done("%s.WalkFiles(%q)", base.Name(), path) + + if !storagedriver.PathRegexp.MatchString(path) && path != "/" { + return storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} + } + + return base.setDriverName(base.StorageDriver.WalkFiles(ctx, path, f)) +} diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index 8fc9d1ca0..b4613f455 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -290,11 +290,17 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file +// from the given path, calling f on each file and directory func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkFiles traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFilesFallback(ctx, d, path, f) +} + // fullPath returns the absolute path of a key within the Driver's storage. func (d *driver) fullPath(subPath string) string { return path.Join(d.rootDirectory, subPath) diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index 4b1f5f48d..0832bb238 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -245,11 +245,17 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file +// from the given path, calling f on each file and directory func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkFiles traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFilesFallback(ctx, d, path, f) +} + type writer struct { d *driver f *file diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 7ee0f79e2..5f8e7dc8f 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1057,7 +1057,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file +// from the given path, calling f on each file and directory func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error { path := from if !strings.HasSuffix(path, "/") { @@ -1070,7 +1070,33 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) } var objectCount int64 - if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil { + if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, true, f); err != nil { + return err + } + + // S3 doesn't have the concept of empty directories, so it'll return path not found if there are no objects + if objectCount == 0 { + return storagedriver.PathNotFoundError{Path: from} + } + + return nil +} + +// WalkFiles traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) WalkFiles(ctx context.Context, from string, f storagedriver.WalkFn) error { + path := from + if !strings.HasSuffix(path, "/") { + path = path + "/" + } + + prefix := "" + if d.s3Path("") == "" { + prefix = "/" + } + + var objectCount int64 + if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, false, f); err != nil { return err } @@ -1110,13 +1136,17 @@ func (wi walkInfoContainer) IsDir() bool { return wi.FileInfoFields.IsDir } -func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error { +func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, walkDirectories bool, f storagedriver.WalkFn) error { var retError error + delimiter := "" + if walkDirectories { + delimiter = "/" + } listObjectsInput := &s3.ListObjectsV2Input{ Bucket: aws.String(d.Bucket), Prefix: aws.String(path), - Delimiter: aws.String("/"), + Delimiter: aws.String(delimiter), MaxKeys: aws.Int64(listMax), } @@ -1180,10 +1210,12 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre return false } - if walkInfo.IsDir() { - if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, f); err != nil { - retError = err - return false + if walkDirectories { + if walkInfo.IsDir() { + if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, walkDirectories, f); err != nil { + retError = err + return false + } } } } diff --git a/registry/storage/driver/storagedriver.go b/registry/storage/driver/storagedriver.go index 340d10804..025b6c67d 100644 --- a/registry/storage/driver/storagedriver.go +++ b/registry/storage/driver/storagedriver.go @@ -90,6 +90,11 @@ type StorageDriver interface { // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops Walk(ctx context.Context, path string, f WalkFn) error + + // WalkFiles traverses a filesystem defined within driver, starting + // from the given path, calling f on each file but does not call f with directories. + // If an error is returned from the WalkFn, processing stops + WalkFiles(ctx context.Context, path string, f WalkFn) error } // FileWriter provides an abstraction for an opened writable file-like object in diff --git a/registry/storage/driver/swift/swift.go b/registry/storage/driver/swift/swift.go index b64e3b66c..a4bea57ea 100644 --- a/registry/storage/driver/swift/swift.go +++ b/registry/storage/driver/swift/swift.go @@ -658,11 +658,17 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file +// from the given path, calling f on each file and directory func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkFiles traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFilesFallback(ctx, d, path, f) +} + func (d *driver) swiftPath(path string) string { return strings.TrimLeft(strings.TrimRight(d.Prefix+"/files"+path, "/"), "/") } diff --git a/registry/storage/driver/walk.go b/registry/storage/driver/walk.go index 8ded5b8be..09e2d613b 100644 --- a/registry/storage/driver/walk.go +++ b/registry/storage/driver/walk.go @@ -59,3 +59,41 @@ func WalkFallback(ctx context.Context, driver StorageDriver, from string, f Walk } return nil } + +// WalkFilesFallback traverses a filesystem defined within driver, starting +// from the given path, calling f on each file. It uses the List method and Stat to drive itself. +// If an error is returned from WalkFn, processing stops +func WalkFilesFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { + children, err := driver.List(ctx, from) + if err != nil { + return err + } + sort.Stable(sort.StringSlice(children)) + for _, child := range children { + // TODO(stevvooe): Calling driver.Stat for every entry is quite + // expensive when running against backends with a slow Stat + // implementation, such as s3. This is very likely a serious + // performance bottleneck. + fileInfo, err := driver.Stat(ctx, child) + if err != nil { + switch err.(type) { + case PathNotFoundError: + // repository was removed in between listing and enumeration. Ignore it. + logrus.WithField("path", child).Infof("ignoring deleted path") + continue + default: + return err + } + } + if !fileInfo.IsDir() { + err = f(fileInfo) + if err != nil { + return err + } + } + if err := WalkFallback(ctx, driver, child, f); err != nil { + return err + } + } + return nil +} From c83e9ae10af5586f88739e7de68779eb674dfc9a Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Thu, 24 Jun 2021 14:48:20 -0400 Subject: [PATCH 02/22] Replaced usage of storagedriver.Walk with storagedriver.WalkFiles for linked/blobstore --- registry/storage/blobstore.go | 7 +------ registry/storage/linkedblobstore.go | 6 +----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/registry/storage/blobstore.go b/registry/storage/blobstore.go index 1008aad8c..ee7a03f0c 100644 --- a/registry/storage/blobstore.go +++ b/registry/storage/blobstore.go @@ -93,12 +93,7 @@ func (bs *blobStore) Enumerate(ctx context.Context, ingester func(dgst digest.Di return err } - return bs.driver.Walk(ctx, specPath, func(fileInfo driver.FileInfo) error { - // skip directories - if fileInfo.IsDir() { - return nil - } - + return bs.driver.WalkFiles(ctx, specPath, func(fileInfo driver.FileInfo) error { currentPath := fileInfo.Path() // we only want to parse paths that end with /data _, fileName := path.Split(currentPath) diff --git a/registry/storage/linkedblobstore.go b/registry/storage/linkedblobstore.go index 3e3fffe19..55992d00d 100644 --- a/registry/storage/linkedblobstore.go +++ b/registry/storage/linkedblobstore.go @@ -247,11 +247,7 @@ func (lbs *linkedBlobStore) Enumerate(ctx context.Context, ingestor func(digest. if err != nil { return err } - return lbs.driver.Walk(ctx, rootPath, func(fileInfo driver.FileInfo) error { - // exit early if directory... - if fileInfo.IsDir() { - return nil - } + return lbs.driver.WalkFiles(ctx, rootPath, func(fileInfo driver.FileInfo) error { filePath := fileInfo.Path() // check if it's a link From 48e2373d5b18ed23cac9911beb1c45b931442d91 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Thu, 24 Jun 2021 17:47:54 -0400 Subject: [PATCH 03/22] storagedriver/s3: Simplified conditional in Walk impl --- registry/storage/driver/s3-aws/s3.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 5f8e7dc8f..2c0b9d08d 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1210,12 +1210,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre return false } - if walkDirectories { - if walkInfo.IsDir() { - if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, walkDirectories, f); err != nil { - retError = err - return false - } + if walkInfo.IsDir() && walkDirectories { + if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, walkDirectories, f); err != nil { + retError = err + return false } } } From cff975b29f5c5d0ba9d5b584642292eccb4ad512 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Fri, 25 Jun 2021 16:56:27 -0400 Subject: [PATCH 04/22] Fixed the fallback implementation of WalkFilesFallback that wasn't working & added a Files Removed test for WalkFilesFallback. --- registry/storage/driver/walk.go | 61 +++++++++------------------- registry/storage/driver/walk_test.go | 20 +++++++++ 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/registry/storage/driver/walk.go b/registry/storage/driver/walk.go index 09e2d613b..23fc8800c 100644 --- a/registry/storage/driver/walk.go +++ b/registry/storage/driver/walk.go @@ -22,6 +22,20 @@ type WalkFn func(fileInfo FileInfo) error // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { + return doWalk(ctx, driver, from, true, f) +} + +// WalkFilesFallback traverses a filesystem defined within driver, starting +// from the given path, calling f on each file. It uses the List method and Stat to drive itself. +// If an error is returned from WalkFn, processing stops +func WalkFilesFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { + return doWalk(ctx, driver, from, false, f) +} + +// WalkFilesFallback traverses a filesystem defined within driver, starting +// from the given path, calling f on each file. It uses the List method and Stat to drive itself. +// If an error is returned from WalkFn, processing stops +func doWalk(ctx context.Context, driver StorageDriver, from string, walkDir bool, f WalkFn) error { children, err := driver.List(ctx, from) if err != nil { return err @@ -43,12 +57,15 @@ func WalkFallback(ctx context.Context, driver StorageDriver, from string, f Walk return err } } - err = f(fileInfo) + err = nil + if !fileInfo.IsDir() || walkDir { + err = f(fileInfo) + } if err == nil && fileInfo.IsDir() { - if err := WalkFallback(ctx, driver, child, f); err != nil { + if err := doWalk(ctx, driver, child, walkDir, f); err != nil { return err } - } else if err == ErrSkipDir { + } else if err == ErrSkipDir && walkDir { // Stop iteration if it's a file, otherwise noop if it's a directory if !fileInfo.IsDir() { return nil @@ -59,41 +76,3 @@ func WalkFallback(ctx context.Context, driver StorageDriver, from string, f Walk } return nil } - -// WalkFilesFallback traverses a filesystem defined within driver, starting -// from the given path, calling f on each file. It uses the List method and Stat to drive itself. -// If an error is returned from WalkFn, processing stops -func WalkFilesFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { - children, err := driver.List(ctx, from) - if err != nil { - return err - } - sort.Stable(sort.StringSlice(children)) - for _, child := range children { - // TODO(stevvooe): Calling driver.Stat for every entry is quite - // expensive when running against backends with a slow Stat - // implementation, such as s3. This is very likely a serious - // performance bottleneck. - fileInfo, err := driver.Stat(ctx, child) - if err != nil { - switch err.(type) { - case PathNotFoundError: - // repository was removed in between listing and enumeration. Ignore it. - logrus.WithField("path", child).Infof("ignoring deleted path") - continue - default: - return err - } - } - if !fileInfo.IsDir() { - err = f(fileInfo) - if err != nil { - return err - } - } - if err := WalkFallback(ctx, driver, child, f); err != nil { - return err - } - } - return nil -} diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index d6e9beb64..c2627be5d 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -45,3 +45,23 @@ func TestWalkFileRemoved(t *testing.T) { t.Fatalf(err.Error()) } } + +func TestWalkFilesFileRemoved(t *testing.T) { + d := &changingFileSystem{ + fileset: []string{"zoidberg", "bender"}, + keptFiles: map[string]bool{ + "zoidberg": true, + }, + } + infos := []FileInfo{} + err := WalkFilesFallback(context.Background(), d, "", func(fileInfo FileInfo) error { + infos = append(infos, fileInfo) + return nil + }) + if len(infos) != 1 || infos[0].Path() != "zoidberg" { + t.Errorf(fmt.Sprintf("unexpected path set during walk: %s", infos)) + } + if err != nil { + t.Fatalf(err.Error()) + } +} From 88420f2c7c8b0d5bb86f40159639a40866527861 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Fri, 25 Jun 2021 18:02:41 -0400 Subject: [PATCH 05/22] Added unit testing for storage driver WalkFallback and WalkFilesFallback --- registry/storage/driver/walk_test.go | 193 +++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index c2627be5d..17de5f0a2 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -3,6 +3,7 @@ package driver import ( "context" "fmt" + "strings" "testing" ) @@ -26,6 +27,31 @@ func (cfs *changingFileSystem) Stat(ctx context.Context, path string) (FileInfo, } return nil, PathNotFoundError{} } + +type fileSystem struct { + StorageDriver + fileset map[string][]string +} + +func (cfs *fileSystem) List(_ context.Context, path string) ([]string, error) { + return cfs.fileset[path], nil +} + +func (cfs *fileSystem) Stat(_ context.Context, path string) (FileInfo, error) { + _, isDir := cfs.fileset[path] + return &FileInfoInternal{ + FileInfoFields: FileInfoFields{ + Path: path, + IsDir: isDir, + Size: int64(len(path)), + }, + }, nil +} +func (cfs *fileSystem) isDir(path string) bool { + _, isDir := cfs.fileset[path] + return isDir +} + func TestWalkFileRemoved(t *testing.T) { d := &changingFileSystem{ fileset: []string{"zoidberg", "bender"}, @@ -65,3 +91,170 @@ func TestWalkFilesFileRemoved(t *testing.T) { t.Fatalf(err.Error()) } } + +func TestWalkFallback(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/a", "/b", "/c"}, + "/a": {"/a/1"}, + "/b": {"/b/1", "/b/2"}, + "/c": {"/c/1", "/c/2"}, + "/a/1": {"/a/1/a", "/a/1/b"}, + }, + } + var expected int + for _, list := range d.fileset { + expected += len(list) + } + + var walked []FileInfo + err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { + t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) + } + walked = append(walked, fileInfo) + return nil + }) + if err != nil { + t.Fatalf(err.Error()) + } + if expected != len(walked) { + t.Fatalf("mismatch number of fileInfo walked, expected %d", expected) + } +} + +// Walk is expected to skip directory on ErrSkipDir +func TestWalkFallbackSkipDirOnDir(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/file1", "/folder1", "/folder2"}, + "/folder1": {"/folder1/file1"}, // should not be walked + "/folder2": {"/folder2/file1"}, + }, + } + skipDir := "/folder1" + expected := []string{ + "/file1", + "/folder1", // return ErrSkipDir, skip anything under /folder1 + // skip /folder1/file1 + "/folder2", + "/folder2/file1", + } + + var walked []string + err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.Path() == skipDir { + return ErrSkipDir + } + if strings.Contains(fileInfo.Path(), skipDir) { + t.Fatalf("skipped dir %s and should not walk %s", skipDir, fileInfo.Path()) + } + return nil + }) + if err != nil { + t.Fatalf("expected Walk to not error %v", err) + } + compareWalked(t, expected, walked) +} + +func TestWalkFallbackSkipDirOnFile(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/file1", "/file2", "/file3"}, + }, + } + skipFile := "/file2" + expected := []string{ + "/file1", + "/file2", // return ErrSkipDir, stop early + } + + var walked []string + err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.Path() == skipFile { + return ErrSkipDir + } + return nil + }) + if err != nil { + t.Fatalf("expected Walk to not error %v", err) + } + compareWalked(t, expected, walked) +} + +// WalkFiles is expected to only walk files, not directories +func TestWalkFilesFallback(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/folder1", "/file1", "/folder2"}, + "/folder1": {"/folder1/folder1"}, + "/folder2": {"/folder2/file1", "/folder2/file2"}, + "/folder1/folder1": {"/folder1/folder1/file1", "/folder1/folder1/file2"}, + }, + } + expected := []string{ + "/file1", + "/folder1/folder1/file1", + "/folder1/folder1/file2", + "/folder2/file1", + "/folder2/file2", + } + + var walked []string + err := WalkFilesFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + if fileInfo.IsDir() { + t.Fatalf("can't walk over dir %s", fileInfo.Path()) + } + if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { + t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) + } + walked = append(walked, fileInfo.Path()) + return nil + }) + if err != nil { + t.Fatalf(err.Error()) + } + compareWalked(t, expected, walked) +} + +// WalkFiles is expected to stop when any error is given +func TestWalkFilesFallbackSkipDir(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/file1", "/folder1", "/folder2"}, + "/folder1": {"/folder1/file1"}, + "/folder2": {"/folder2/file1"}, + }, + } + skipFile := "/folder1/file1" + expected := []string{ + "/file1", "/folder1/file1", + } + + var walked []string + err := WalkFilesFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + fmt.Println("Walk ", fileInfo.Path()) + walked = append(walked, fileInfo.Path()) + if fileInfo.Path() == skipFile { + return ErrSkipDir + } + return nil + }) + if err == nil { + t.Fatalf("expected Walk to ErrSkipDir %v", err) + } + compareWalked(t, expected, walked) +} + +func compareWalked(t *testing.T, expected, walked []string) { + if len(walked) != len(expected) { + t.Fatalf("Mismatch number of fileInfo walked %d expected %d", len(walked), len(expected)) + } + for i := range walked { + if walked[i] != expected[i] { + t.Fatalf("expected walked to come in order expected: walked %s", walked) + } + } +} From a08cc38c057d0d9372d84776c41b386a5667e883 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Fri, 25 Jun 2021 18:08:11 -0400 Subject: [PATCH 06/22] storagedriver/s3: additional tests --- registry/storage/driver/walk_test.go | 33 ++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index 17de5f0a2..eb4501286 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -2,6 +2,7 @@ package driver import ( "context" + "errors" "fmt" "strings" "testing" @@ -128,7 +129,7 @@ func TestWalkFallbackSkipDirOnDir(t *testing.T) { d := &fileSystem{ fileset: map[string][]string{ "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, // should not be walked + "/folder1": {"/folder1/file1"}, "/folder2": {"/folder2/file1"}, }, } @@ -184,6 +185,34 @@ func TestWalkFallbackSkipDirOnFile(t *testing.T) { compareWalked(t, expected, walked) } +// Walk is expected to skip directory on ErrSkipDir +func TestWalkFallbackErr(t *testing.T) { + d := &fileSystem{ + fileset: map[string][]string{ + "/": {"/file1", "/file2", "/file3"}, + }, + } + errFile := "/file2" + expected := []string{ + "/file1", + "/file2", // return ErrSkipDir, stop early + } + expectedErr := errors.New("foo") + + var walked []string + err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.Path() == errFile { + return expectedErr + } + return nil + }) + if err != expectedErr { + t.Fatalf("unexpected err %v", err) + } + compareWalked(t, expected, walked) +} + // WalkFiles is expected to only walk files, not directories func TestWalkFilesFallback(t *testing.T) { d := &fileSystem{ @@ -220,7 +249,7 @@ func TestWalkFilesFallback(t *testing.T) { } // WalkFiles is expected to stop when any error is given -func TestWalkFilesFallbackSkipDir(t *testing.T) { +func TestWalkFilesFallbackErr(t *testing.T) { d := &fileSystem{ fileset: map[string][]string{ "/": {"/file1", "/folder1", "/folder2"}, From c98c694d51cf09604729521ea986d6971136b0da Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Fri, 25 Jun 2021 18:11:46 -0400 Subject: [PATCH 07/22] storagedriver/s3: refining tests --- registry/storage/driver/walk_test.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index eb4501286..4bbca1538 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -96,32 +96,31 @@ func TestWalkFilesFileRemoved(t *testing.T) { func TestWalkFallback(t *testing.T) { d := &fileSystem{ fileset: map[string][]string{ - "/": {"/a", "/b", "/c"}, - "/a": {"/a/1"}, - "/b": {"/b/1", "/b/2"}, - "/c": {"/c/1", "/c/2"}, - "/a/1": {"/a/1/a", "/a/1/b"}, + "/": {"/file1", "/folder1", "/folder2"}, + "/folder1": {"/folder1/file1"}, + "/folder2": {"/folder2/file1"}, }, } - var expected int - for _, list := range d.fileset { - expected += len(list) + expected := []string{ + "/file1", + "/folder1", // return ErrSkipDir, skip anything under /folder1 + "/folder1/file1", + "/folder2", + "/folder2/file1", } - var walked []FileInfo + var walked []string err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) } - walked = append(walked, fileInfo) + walked = append(walked, fileInfo.Path()) return nil }) if err != nil { t.Fatalf(err.Error()) } - if expected != len(walked) { - t.Fatalf("mismatch number of fileInfo walked, expected %d", expected) - } + compareWalked(t, expected, walked) } // Walk is expected to skip directory on ErrSkipDir From 1c3ee66061892091d5e9bae65ef42820d47d2396 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Fri, 25 Jun 2021 18:12:34 -0400 Subject: [PATCH 08/22] storagedriver/s3: continue refining tests --- registry/storage/driver/walk_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index 4bbca1538..36d9c0f48 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -103,7 +103,7 @@ func TestWalkFallback(t *testing.T) { } expected := []string{ "/file1", - "/folder1", // return ErrSkipDir, skip anything under /folder1 + "/folder1", "/folder1/file1", "/folder2", "/folder2/file1", From e20be1ead5dff36c70d0013c7237daf8e4685f2f Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 12:17:40 -0400 Subject: [PATCH 09/22] storagedriver/s3: Reverting WalkFiles method. Instead, optimizing the Walk method. --- registry/storage/blobstore.go | 2 +- registry/storage/driver/azure/azure.go | 6 - registry/storage/driver/base/base.go | 12 -- registry/storage/driver/filesystem/driver.go | 6 - registry/storage/driver/inmemory/driver.go | 6 - registry/storage/driver/s3-aws/s3.go | 156 ++++++------------- registry/storage/driver/storagedriver.go | 5 - registry/storage/driver/swift/swift.go | 6 - registry/storage/driver/walk.go | 23 +-- registry/storage/driver/walk_test.go | 84 ---------- registry/storage/linkedblobstore.go | 2 +- 11 files changed, 52 insertions(+), 256 deletions(-) diff --git a/registry/storage/blobstore.go b/registry/storage/blobstore.go index ee7a03f0c..d8db5b417 100644 --- a/registry/storage/blobstore.go +++ b/registry/storage/blobstore.go @@ -93,7 +93,7 @@ func (bs *blobStore) Enumerate(ctx context.Context, ingester func(dgst digest.Di return err } - return bs.driver.WalkFiles(ctx, specPath, func(fileInfo driver.FileInfo) error { + return bs.driver.Walk(ctx, specPath, func(fileInfo driver.FileInfo) error { currentPath := fileInfo.Path() // we only want to parse paths that end with /data _, fileName := path.Split(currentPath) diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index c36fa7dfb..f7866825c 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -365,12 +365,6 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } -// WalkFiles traverses a filesystem defined within driver, starting -// from the given path, calling f on each file -func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { - return storagedriver.WalkFilesFallback(ctx, d, path, f) -} - // directDescendants will find direct descendants (blobs or virtual containers) // of from list of blob paths and will return their full paths. Elements in blobs // list must be prefixed with a "/" and diff --git a/registry/storage/driver/base/base.go b/registry/storage/driver/base/base.go index e2f65f270..3864eacf7 100644 --- a/registry/storage/driver/base/base.go +++ b/registry/storage/driver/base/base.go @@ -241,15 +241,3 @@ func (base *Base) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return base.setDriverName(base.StorageDriver.Walk(ctx, path, f)) } - -// WalkFiles wraps WalkFiles of underlying storage driver. -func (base *Base) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { - ctx, done := dcontext.WithTrace(ctx) - defer done("%s.WalkFiles(%q)", base.Name(), path) - - if !storagedriver.PathRegexp.MatchString(path) && path != "/" { - return storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} - } - - return base.setDriverName(base.StorageDriver.WalkFiles(ctx, path, f)) -} diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index b4613f455..73d379656 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -295,12 +295,6 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } -// WalkFiles traverses a filesystem defined within driver, starting -// from the given path, calling f on each file -func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { - return storagedriver.WalkFilesFallback(ctx, d, path, f) -} - // fullPath returns the absolute path of a key within the Driver's storage. func (d *driver) fullPath(subPath string) string { return path.Join(d.rootDirectory, subPath) diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index 0832bb238..b875ebe48 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -250,12 +250,6 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } -// WalkFiles traverses a filesystem defined within driver, starting -// from the given path, calling f on each file -func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { - return storagedriver.WalkFilesFallback(ctx, d, path, f) -} - type writer struct { d *driver f *file diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 2c0b9d08d..3d2c6010d 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "math" "net/http" + "path/filepath" "reflect" "sort" "strconv" @@ -1057,7 +1058,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file and directory +// from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error { path := from if !strings.HasSuffix(path, "/") { @@ -1070,7 +1071,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) } var objectCount int64 - if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, true, f); err != nil { + if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil { return err } @@ -1082,72 +1083,19 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) return nil } -// WalkFiles traverses a filesystem defined within driver, starting -// from the given path, calling f on each file -func (d *driver) WalkFiles(ctx context.Context, from string, f storagedriver.WalkFn) error { - path := from - if !strings.HasSuffix(path, "/") { - path = path + "/" - } +func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error { + var ( + retError error + // the most recent directory walked for de-duping + prevDir string + // the most recent skip directory to avoid walking over undesirable files + prevSkipDir string + ) - prefix := "" - if d.s3Path("") == "" { - prefix = "/" - } - - var objectCount int64 - if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, false, f); err != nil { - return err - } - - // S3 doesn't have the concept of empty directories, so it'll return path not found if there are no objects - if objectCount == 0 { - return storagedriver.PathNotFoundError{Path: from} - } - - return nil -} - -type walkInfoContainer struct { - storagedriver.FileInfoFields - prefix *string -} - -// Path provides the full path of the target of this file info. -func (wi walkInfoContainer) Path() string { - return wi.FileInfoFields.Path -} - -// Size returns current length in bytes of the file. The return value can -// be used to write to the end of the file at path. The value is -// meaningless if IsDir returns true. -func (wi walkInfoContainer) Size() int64 { - return wi.FileInfoFields.Size -} - -// ModTime returns the modification time for the file. For backends that -// don't have a modification time, the creation time should be returned. -func (wi walkInfoContainer) ModTime() time.Time { - return wi.FileInfoFields.ModTime -} - -// IsDir returns true if the path is a directory. -func (wi walkInfoContainer) IsDir() bool { - return wi.FileInfoFields.IsDir -} - -func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, walkDirectories bool, f storagedriver.WalkFn) error { - var retError error - - delimiter := "" - if walkDirectories { - delimiter = "/" - } listObjectsInput := &s3.ListObjectsV2Input{ - Bucket: aws.String(d.Bucket), - Prefix: aws.String(path), - Delimiter: aws.String(delimiter), - MaxKeys: aws.Int64(listMax), + Bucket: aws.String(d.Bucket), + Prefix: aws.String(path), + MaxKeys: aws.Int64(listMax), } s := d.s3Client(parentCtx) @@ -1155,36 +1103,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre ctx, done := dcontext.WithTrace(parentCtx) defer done("s3aws.ListObjectsV2Pages(%s)", path) listObjectErr := s.ListObjectsV2PagesWithContext(ctx, listObjectsInput, func(objects *s3.ListObjectsV2Output, lastPage bool) bool { - - var count int64 - // KeyCount was introduced with version 2 of the GET Bucket operation in S3. - // Some s3 implementations (looking at you ceph/rgw) have a buggy - // implementation so we intionally avoid ever using it, preferring instead - // to calculate the count from the Contents and CommonPrefixes fields of - // the s3.ListObjectsV2Output. we retain the commented out KeyCount code - // and this comment so as not to forget this problem moving forward. - // - // count = *objects.KeyCount - // *objectCount += *objects.KeyCount - - count = int64(len(objects.Contents) + len(objects.CommonPrefixes)) - *objectCount += count - - walkInfos := make([]walkInfoContainer, 0, count) - - for _, dir := range objects.CommonPrefixes { - commonPrefix := *dir.Prefix - walkInfos = append(walkInfos, walkInfoContainer{ - prefix: dir.Prefix, - FileInfoFields: storagedriver.FileInfoFields{ - IsDir: true, - Path: strings.Replace(commonPrefix[:len(commonPrefix)-1], d.s3Path(""), prefix, 1), - }, - }) - } + walkInfos := make([]storagedriver.FileInfoInternal, 0, len(objects.Contents)) for _, file := range objects.Contents { - walkInfos = append(walkInfos, walkInfoContainer{ + walkInfos = append(walkInfos, storagedriver.FileInfoInternal{ FileInfoFields: storagedriver.FileInfoFields{ IsDir: false, Size: *file.Size, @@ -1194,28 +1116,44 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre }) } - sort.SliceStable(walkInfos, func(i, j int) bool { return walkInfos[i].FileInfoFields.Path < walkInfos[j].FileInfoFields.Path }) - for _, walkInfo := range walkInfos { - err := f(walkInfo) - - if err == storagedriver.ErrSkipDir { - if walkInfo.IsDir() { - continue - } else { - break - } - } else if err != nil { - retError = err - return false + // skip any results under the last skip directory + if prevSkipDir != "" && strings.HasPrefix(walkInfo.Path(), prevSkipDir) { + continue } - if walkInfo.IsDir() && walkDirectories { - if err := d.doWalk(ctx, objectCount, *walkInfo.prefix, prefix, walkDirectories, f); err != nil { + dir := filepath.Dir(walkInfo.Path()) + if dir != prevDir { + prevDir = dir + walkDirInfo := storagedriver.FileInfoInternal{ + FileInfoFields: storagedriver.FileInfoFields{ + IsDir: true, + Path: dir, + }, + } + err := f(walkDirInfo) + *objectCount++ + + if err != nil { + if err == storagedriver.ErrSkipDir { + prevSkipDir = dir + continue + } retError = err return false } } + + err := f(walkInfo) + *objectCount++ + + if err != nil { + if err == storagedriver.ErrSkipDir { + break + } + retError = err + return false + } } return true }) diff --git a/registry/storage/driver/storagedriver.go b/registry/storage/driver/storagedriver.go index 025b6c67d..340d10804 100644 --- a/registry/storage/driver/storagedriver.go +++ b/registry/storage/driver/storagedriver.go @@ -90,11 +90,6 @@ type StorageDriver interface { // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops Walk(ctx context.Context, path string, f WalkFn) error - - // WalkFiles traverses a filesystem defined within driver, starting - // from the given path, calling f on each file but does not call f with directories. - // If an error is returned from the WalkFn, processing stops - WalkFiles(ctx context.Context, path string, f WalkFn) error } // FileWriter provides an abstraction for an opened writable file-like object in diff --git a/registry/storage/driver/swift/swift.go b/registry/storage/driver/swift/swift.go index a4bea57ea..89ae69af4 100644 --- a/registry/storage/driver/swift/swift.go +++ b/registry/storage/driver/swift/swift.go @@ -663,12 +663,6 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } -// WalkFiles traverses a filesystem defined within driver, starting -// from the given path, calling f on each file -func (d *driver) WalkFiles(ctx context.Context, path string, f storagedriver.WalkFn) error { - return storagedriver.WalkFilesFallback(ctx, d, path, f) -} - func (d *driver) swiftPath(path string) string { return strings.TrimLeft(strings.TrimRight(d.Prefix+"/files"+path, "/"), "/") } diff --git a/registry/storage/driver/walk.go b/registry/storage/driver/walk.go index 23fc8800c..8ded5b8be 100644 --- a/registry/storage/driver/walk.go +++ b/registry/storage/driver/walk.go @@ -22,20 +22,6 @@ type WalkFn func(fileInfo FileInfo) error // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { - return doWalk(ctx, driver, from, true, f) -} - -// WalkFilesFallback traverses a filesystem defined within driver, starting -// from the given path, calling f on each file. It uses the List method and Stat to drive itself. -// If an error is returned from WalkFn, processing stops -func WalkFilesFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { - return doWalk(ctx, driver, from, false, f) -} - -// WalkFilesFallback traverses a filesystem defined within driver, starting -// from the given path, calling f on each file. It uses the List method and Stat to drive itself. -// If an error is returned from WalkFn, processing stops -func doWalk(ctx context.Context, driver StorageDriver, from string, walkDir bool, f WalkFn) error { children, err := driver.List(ctx, from) if err != nil { return err @@ -57,15 +43,12 @@ func doWalk(ctx context.Context, driver StorageDriver, from string, walkDir bool return err } } - err = nil - if !fileInfo.IsDir() || walkDir { - err = f(fileInfo) - } + err = f(fileInfo) if err == nil && fileInfo.IsDir() { - if err := doWalk(ctx, driver, child, walkDir, f); err != nil { + if err := WalkFallback(ctx, driver, child, f); err != nil { return err } - } else if err == ErrSkipDir && walkDir { + } else if err == ErrSkipDir { // Stop iteration if it's a file, otherwise noop if it's a directory if !fileInfo.IsDir() { return nil diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index 36d9c0f48..c4abd9d06 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -73,26 +73,6 @@ func TestWalkFileRemoved(t *testing.T) { } } -func TestWalkFilesFileRemoved(t *testing.T) { - d := &changingFileSystem{ - fileset: []string{"zoidberg", "bender"}, - keptFiles: map[string]bool{ - "zoidberg": true, - }, - } - infos := []FileInfo{} - err := WalkFilesFallback(context.Background(), d, "", func(fileInfo FileInfo) error { - infos = append(infos, fileInfo) - return nil - }) - if len(infos) != 1 || infos[0].Path() != "zoidberg" { - t.Errorf(fmt.Sprintf("unexpected path set during walk: %s", infos)) - } - if err != nil { - t.Fatalf(err.Error()) - } -} - func TestWalkFallback(t *testing.T) { d := &fileSystem{ fileset: map[string][]string{ @@ -212,70 +192,6 @@ func TestWalkFallbackErr(t *testing.T) { compareWalked(t, expected, walked) } -// WalkFiles is expected to only walk files, not directories -func TestWalkFilesFallback(t *testing.T) { - d := &fileSystem{ - fileset: map[string][]string{ - "/": {"/folder1", "/file1", "/folder2"}, - "/folder1": {"/folder1/folder1"}, - "/folder2": {"/folder2/file1", "/folder2/file2"}, - "/folder1/folder1": {"/folder1/folder1/file1", "/folder1/folder1/file2"}, - }, - } - expected := []string{ - "/file1", - "/folder1/folder1/file1", - "/folder1/folder1/file2", - "/folder2/file1", - "/folder2/file2", - } - - var walked []string - err := WalkFilesFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - if fileInfo.IsDir() { - t.Fatalf("can't walk over dir %s", fileInfo.Path()) - } - if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { - t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) - } - walked = append(walked, fileInfo.Path()) - return nil - }) - if err != nil { - t.Fatalf(err.Error()) - } - compareWalked(t, expected, walked) -} - -// WalkFiles is expected to stop when any error is given -func TestWalkFilesFallbackErr(t *testing.T) { - d := &fileSystem{ - fileset: map[string][]string{ - "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, - "/folder2": {"/folder2/file1"}, - }, - } - skipFile := "/folder1/file1" - expected := []string{ - "/file1", "/folder1/file1", - } - - var walked []string - err := WalkFilesFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - fmt.Println("Walk ", fileInfo.Path()) - walked = append(walked, fileInfo.Path()) - if fileInfo.Path() == skipFile { - return ErrSkipDir - } - return nil - }) - if err == nil { - t.Fatalf("expected Walk to ErrSkipDir %v", err) - } - compareWalked(t, expected, walked) -} - func compareWalked(t *testing.T, expected, walked []string) { if len(walked) != len(expected) { t.Fatalf("Mismatch number of fileInfo walked %d expected %d", len(walked), len(expected)) diff --git a/registry/storage/linkedblobstore.go b/registry/storage/linkedblobstore.go index 55992d00d..7f6ed18e1 100644 --- a/registry/storage/linkedblobstore.go +++ b/registry/storage/linkedblobstore.go @@ -247,7 +247,7 @@ func (lbs *linkedBlobStore) Enumerate(ctx context.Context, ingestor func(digest. if err != nil { return err } - return lbs.driver.WalkFiles(ctx, rootPath, func(fileInfo driver.FileInfo) error { + return lbs.driver.Walk(ctx, rootPath, func(fileInfo driver.FileInfo) error { filePath := fileInfo.Path() // check if it's a link From 8b726cc377d911023f0fb5e7576c9000256b7397 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 12:31:47 -0400 Subject: [PATCH 10/22] storagedriver/s3: Reverting a few changes from previous WalkFiles impl that was left in. --- registry/storage/blobstore.go | 5 +++++ registry/storage/driver/azure/azure.go | 2 +- registry/storage/driver/filesystem/driver.go | 2 +- registry/storage/driver/inmemory/driver.go | 2 +- registry/storage/driver/swift/swift.go | 2 +- registry/storage/linkedblobstore.go | 4 ++++ 6 files changed, 13 insertions(+), 4 deletions(-) diff --git a/registry/storage/blobstore.go b/registry/storage/blobstore.go index d8db5b417..1008aad8c 100644 --- a/registry/storage/blobstore.go +++ b/registry/storage/blobstore.go @@ -94,6 +94,11 @@ func (bs *blobStore) Enumerate(ctx context.Context, ingester func(dgst digest.Di } return bs.driver.Walk(ctx, specPath, func(fileInfo driver.FileInfo) error { + // skip directories + if fileInfo.IsDir() { + return nil + } + currentPath := fileInfo.Path() // we only want to parse paths that end with /data _, fileName := path.Split(currentPath) diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index f7866825c..58c6293b2 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -360,7 +360,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file and directory +// from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index 73d379656..8fc9d1ca0 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -290,7 +290,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file and directory +// from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index b875ebe48..4b1f5f48d 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -245,7 +245,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file and directory +// from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } diff --git a/registry/storage/driver/swift/swift.go b/registry/storage/driver/swift/swift.go index 89ae69af4..b64e3b66c 100644 --- a/registry/storage/driver/swift/swift.go +++ b/registry/storage/driver/swift/swift.go @@ -658,7 +658,7 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int } // Walk traverses a filesystem defined within driver, starting -// from the given path, calling f on each file and directory +// from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { return storagedriver.WalkFallback(ctx, d, path, f) } diff --git a/registry/storage/linkedblobstore.go b/registry/storage/linkedblobstore.go index 7f6ed18e1..3e3fffe19 100644 --- a/registry/storage/linkedblobstore.go +++ b/registry/storage/linkedblobstore.go @@ -248,6 +248,10 @@ func (lbs *linkedBlobStore) Enumerate(ctx context.Context, ingestor func(digest. return err } return lbs.driver.Walk(ctx, rootPath, func(fileInfo driver.FileInfo) error { + // exit early if directory... + if fileInfo.IsDir() { + return nil + } filePath := fileInfo.Path() // check if it's a link From aea873cc9278a6b70b0780e424863d6add13a9c5 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 13:10:07 -0400 Subject: [PATCH 11/22] storagedriver/s3: test refactor. fixed a bug in WalkFallback preventing ErrSkipDir from stopping gracefully --- registry/storage/driver/walk.go | 21 +-- registry/storage/driver/walk_test.go | 190 ++++++++++++--------------- 2 files changed, 97 insertions(+), 114 deletions(-) diff --git a/registry/storage/driver/walk.go b/registry/storage/driver/walk.go index 8ded5b8be..66c381b17 100644 --- a/registry/storage/driver/walk.go +++ b/registry/storage/driver/walk.go @@ -22,9 +22,14 @@ type WalkFn func(fileInfo FileInfo) error // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { + err, _ := doWalkFallback(ctx, driver, from, f) + return err +} + +func doWalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) (error, bool) { children, err := driver.List(ctx, from) if err != nil { - return err + return err, false } sort.Stable(sort.StringSlice(children)) for _, child := range children { @@ -40,22 +45,22 @@ func WalkFallback(ctx context.Context, driver StorageDriver, from string, f Walk logrus.WithField("path", child).Infof("ignoring deleted path") continue default: - return err + return err, false } } err = f(fileInfo) if err == nil && fileInfo.IsDir() { - if err := WalkFallback(ctx, driver, child, f); err != nil { - return err + if err, ok := doWalkFallback(ctx, driver, child, f); err != nil || !ok { + return err, ok } } else if err == ErrSkipDir { - // Stop iteration if it's a file, otherwise noop if it's a directory + // noop for folders, will just skip if !fileInfo.IsDir() { - return nil + return nil, false // no error but stop iteration } } else if err != nil { - return err + return err, false } } - return nil + return nil, true } diff --git a/registry/storage/driver/walk_test.go b/registry/storage/driver/walk_test.go index c4abd9d06..2a3a4fb2b 100644 --- a/registry/storage/driver/walk_test.go +++ b/registry/storage/driver/walk_test.go @@ -2,7 +2,6 @@ package driver import ( "context" - "errors" "fmt" "strings" "testing" @@ -14,10 +13,10 @@ type changingFileSystem struct { keptFiles map[string]bool } -func (cfs *changingFileSystem) List(ctx context.Context, path string) ([]string, error) { +func (cfs *changingFileSystem) List(_ context.Context, _ string) ([]string, error) { return cfs.fileset, nil } -func (cfs *changingFileSystem) Stat(ctx context.Context, path string) (FileInfo, error) { +func (cfs *changingFileSystem) Stat(_ context.Context, path string) (FileInfo, error) { kept, ok := cfs.keptFiles[path] if ok && kept { return &FileInfoInternal{ @@ -31,6 +30,7 @@ func (cfs *changingFileSystem) Stat(ctx context.Context, path string) (FileInfo, type fileSystem struct { StorageDriver + // maps folder to list results fileset map[string][]string } @@ -81,120 +81,98 @@ func TestWalkFallback(t *testing.T) { "/folder2": {"/folder2/file1"}, }, } - expected := []string{ - "/file1", - "/folder1", - "/folder1/file1", - "/folder2", - "/folder2/file1", - } + noopFn := func(fileInfo FileInfo) error { return nil } - var walked []string - err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { - t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) - } - walked = append(walked, fileInfo.Path()) - return nil - }) - if err != nil { - t.Fatalf(err.Error()) - } - compareWalked(t, expected, walked) -} - -// Walk is expected to skip directory on ErrSkipDir -func TestWalkFallbackSkipDirOnDir(t *testing.T) { - d := &fileSystem{ - fileset: map[string][]string{ - "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, - "/folder2": {"/folder2/file1"}, + tcs := []struct { + name string + fn WalkFn + from string + expected []string + err bool + }{ + { + name: "walk all", + fn: noopFn, + expected: []string{ + "/file1", + "/folder1", + "/folder1/file1", + "/folder2", + "/folder2/file1", + }, + }, + { + name: "skip directory", + fn: func(fileInfo FileInfo) error { + if fileInfo.Path() == "/folder1" { + return ErrSkipDir + } + if strings.Contains(fileInfo.Path(), "/folder1") { + t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) + } + return nil + }, + expected: []string{ + "/file1", + "/folder1", // return ErrSkipDir, skip anything under /folder1 + // skip /folder1/file1 + "/folder2", + "/folder2/file1", + }, + }, + { + name: "stop early", + fn: func(fileInfo FileInfo) error { + if fileInfo.Path() == "/folder1/file1" { + return ErrSkipDir + } + return nil + }, + expected: []string{ + "/file1", + "/folder1", + "/folder1/file1", + // stop early + }, + }, + { + name: "from folder", + fn: noopFn, + expected: []string{ + "/folder1/file1", + }, + from: "/folder1", }, } - skipDir := "/folder1" - expected := []string{ - "/file1", - "/folder1", // return ErrSkipDir, skip anything under /folder1 - // skip /folder1/file1 - "/folder2", - "/folder2/file1", - } - var walked []string - err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - walked = append(walked, fileInfo.Path()) - if fileInfo.Path() == skipDir { - return ErrSkipDir + for _, tc := range tcs { + var walked []string + if tc.from == "" { + tc.from = "/" } - if strings.Contains(fileInfo.Path(), skipDir) { - t.Fatalf("skipped dir %s and should not walk %s", skipDir, fileInfo.Path()) - } - return nil - }) - if err != nil { - t.Fatalf("expected Walk to not error %v", err) - } - compareWalked(t, expected, walked) -} - -func TestWalkFallbackSkipDirOnFile(t *testing.T) { - d := &fileSystem{ - fileset: map[string][]string{ - "/": {"/file1", "/file2", "/file3"}, - }, - } - skipFile := "/file2" - expected := []string{ - "/file1", - "/file2", // return ErrSkipDir, stop early + t.Run(tc.name, func(t *testing.T) { + err := WalkFallback(context.Background(), d, tc.from, func(fileInfo FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.IsDir() != d.isDir(fileInfo.Path()) { + t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir()) + } + return tc.fn(fileInfo) + }) + if tc.err && err == nil { + t.Fatalf("expected err") + } + if !tc.err && err != nil { + t.Fatalf(err.Error()) + } + compareWalked(t, tc.expected, walked) + }) } - var walked []string - err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - walked = append(walked, fileInfo.Path()) - if fileInfo.Path() == skipFile { - return ErrSkipDir - } - return nil - }) - if err != nil { - t.Fatalf("expected Walk to not error %v", err) - } - compareWalked(t, expected, walked) -} - -// Walk is expected to skip directory on ErrSkipDir -func TestWalkFallbackErr(t *testing.T) { - d := &fileSystem{ - fileset: map[string][]string{ - "/": {"/file1", "/file2", "/file3"}, - }, - } - errFile := "/file2" - expected := []string{ - "/file1", - "/file2", // return ErrSkipDir, stop early - } - expectedErr := errors.New("foo") - - var walked []string - err := WalkFallback(context.Background(), d, "/", func(fileInfo FileInfo) error { - walked = append(walked, fileInfo.Path()) - if fileInfo.Path() == errFile { - return expectedErr - } - return nil - }) - if err != expectedErr { - t.Fatalf("unexpected err %v", err) - } - compareWalked(t, expected, walked) } func compareWalked(t *testing.T, expected, walked []string) { if len(walked) != len(expected) { - t.Fatalf("Mismatch number of fileInfo walked %d expected %d", len(walked), len(expected)) + t.Fatalf("Mismatch number of fileInfo walked %d expected %d; walked %s; expected %s;", len(walked), len(expected), walked, expected) } for i := range walked { if walked[i] != expected[i] { From 847738fd5ed977da7c9135bf757942c5d21f5d56 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 13:16:33 -0400 Subject: [PATCH 12/22] storagedriver/s3: fixed a bug in s3 Walk impl preventing ErrSkipDir from stopping gracefully --- registry/storage/driver/s3-aws/s3.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 3d2c6010d..a728be92a 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1122,6 +1122,7 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre continue } + // walk over file's parent directory if not a duplicate dir := filepath.Dir(walkInfo.Path()) if dir != prevDir { prevDir = dir @@ -1149,7 +1150,8 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre if err != nil { if err == storagedriver.ErrSkipDir { - break + // stop early without return error + return false } retError = err return false From fbb45bd8a09abb870b4c4b2126ac0c0a76d8b1f8 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 15:50:48 -0400 Subject: [PATCH 13/22] storagedriver: Adding an interface conformance test for Walk --- .../storage/driver/conformance/conformance.go | 171 ++++++++++++++++++ registry/storage/driver/s3-aws/s3_test.go | 25 ++- 2 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 registry/storage/driver/conformance/conformance.go diff --git a/registry/storage/driver/conformance/conformance.go b/registry/storage/driver/conformance/conformance.go new file mode 100644 index 000000000..d7b1c7ea2 --- /dev/null +++ b/registry/storage/driver/conformance/conformance.go @@ -0,0 +1,171 @@ +package conformance + +import ( + "context" + "errors" + "fmt" + storagedriver "github.com/docker/distribution/registry/storage/driver" + "strings" + "testing" +) + +var fileset = map[string][]string{ + "/": {"/file1", "/folder1", "/folder2"}, + "/folder1": {"/folder1/file1"}, + "/folder2": {"/folder2/file1"}, +} + +type walkConformance struct { + driver storagedriver.StorageDriver + created []string +} + +func (wc *walkConformance) init() error { + for _, paths := range fileset { + for _, path := range paths { + if _, isDir := fileset[path]; isDir { + continue // skip directories + } + err := wc.driver.PutContent(context.Background(), path, []byte("content "+path)) + if err != nil { + return errors.New(fmt.Sprintf("unable to create file %s: %s", path, err)) + } + wc.created = append(wc.created, path) + } + } + return nil +} + +func (wc *walkConformance) cleanup() error { + var lastError error + for _, path := range wc.created { + err := wc.driver.Delete(context.Background(), path) + if err != nil { + _ = fmt.Errorf("cleanup failed for path %s: %s", path, err) + lastError = err + } + } + return lastError +} + +func (wc *walkConformance) isDir(path string) bool { + _, isDir := fileset[path] + return isDir +} + +func Run(driver storagedriver.StorageDriver, t *testing.T) error { + wc := walkConformance{driver: driver} + defer func() { + err := wc.cleanup() + if err != nil { + t.Fatalf("cleanup failed: %s", err) + } + }() + err := wc.init() + if err != nil { + return err + } + + noopFn := func(fileInfo storagedriver.FileInfo) error { return nil } + + tcs := []struct { + name string + fn storagedriver.WalkFn + from string + expected []string + err bool + }{ + { + name: "walk all", + fn: noopFn, + expected: []string{ + "/", + "/file1", + "/folder1", + "/folder1/file1", + "/folder2", + "/folder2/file1", + }, + }, + { + name: "skip directory", + fn: func(fileInfo storagedriver.FileInfo) error { + if fileInfo.Path() == "/folder1" { + return storagedriver.ErrSkipDir + } + if strings.Contains(fileInfo.Path(), "/folder1") { + t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) + } + return nil + }, + expected: []string{ + "/", + "/file1", + "/folder1", // return ErrSkipDir, skip anything under /folder1 + // skip /folder1/file1 + "/folder2", + "/folder2/file1", + }, + }, + { + name: "stop early", + fn: func(fileInfo storagedriver.FileInfo) error { + if fileInfo.Path() == "/folder1/file1" { + return storagedriver.ErrSkipDir + } + return nil + }, + expected: []string{ + "/", + "/file1", + "/folder1", + "/folder1/file1", + // stop early + }, + }, + { + name: "from folder", + fn: noopFn, + expected: []string{ + "/folder1", + "/folder1/file1", + }, + from: "/folder1", + }, + } + + for _, tc := range tcs { + var walked []string + if tc.from == "" { + tc.from = "/" + } + t.Run(tc.name, func(t *testing.T) { + err := wc.driver.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.IsDir() != wc.isDir(fileInfo.Path()) { + t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", wc.isDir(fileInfo.Path()), fileInfo.IsDir()) + } + return tc.fn(fileInfo) + }) + if tc.err && err == nil { + t.Fatalf("expected err") + } + if !tc.err && err != nil { + t.Fatalf(err.Error()) + } + compareWalked(t, tc.expected, walked) + }) + } + return nil +} + +func compareWalked(t *testing.T, expected, walked []string) { + if len(walked) != len(expected) { + t.Fatalf("Mismatch number of fileInfo walked %d expected %d; walked %s; expected %s;", len(walked), len(expected), walked, expected) + } + for i := range walked { + if walked[i] != expected[i] { + t.Fatalf("expected walked to come in order expected: walked %s", walked) + } + } +} diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index 482ab3eb4..e1f55b2a0 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -2,6 +2,7 @@ package s3 import ( "bytes" + "github.com/docker/distribution/registry/storage/driver/conformance" "io/ioutil" "math/rand" "os" @@ -106,7 +107,7 @@ func init() { // Skip S3 storage driver tests if environment variable parameters are not provided skipS3 = func() string { - if accessKey == "" || secretKey == "" || region == "" || bucket == "" || encrypt == "" { + if accessKey == "" || secretKey == "" || bucket == "" || encrypt == "" { return "Must set AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, S3_BUCKET, and S3_ENCRYPT to run S3 tests" } return "" @@ -240,6 +241,28 @@ func TestStorageClass(t *testing.T) { } +func TestConformance(t *testing.T) { + if skipS3() != "" { + t.Skip(skipS3()) + } + + rootDir, err := ioutil.TempDir("", "driver-") + if err != nil { + t.Fatalf("unexpected error creating temporary directory: %v", err) + } + defer os.Remove(rootDir) + + standardDriver, err := s3DriverConstructor(rootDir, s3.StorageClassStandard) + if err != nil { + t.Fatalf("unexpected error creating driver with standard storage: %v", err) + } + + err = conformance.Run(standardDriver, t) + if err != nil { + t.Fatalf("conformance failed: %v", err) + } +} + func TestOverThousandBlobs(t *testing.T) { if skipS3() != "" { t.Skip(skipS3()) From 70573f92c701c21a3d664b5da1c640b893bc4294 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 15:52:19 -0400 Subject: [PATCH 14/22] storagedriver: Adding an interface conformance test for Walk, cont. --- registry/storage/driver/conformance/conformance.go | 2 +- registry/storage/driver/s3-aws/s3_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/registry/storage/driver/conformance/conformance.go b/registry/storage/driver/conformance/conformance.go index d7b1c7ea2..06fec2f5e 100644 --- a/registry/storage/driver/conformance/conformance.go +++ b/registry/storage/driver/conformance/conformance.go @@ -53,7 +53,7 @@ func (wc *walkConformance) isDir(path string) bool { return isDir } -func Run(driver storagedriver.StorageDriver, t *testing.T) error { +func TestWalk(driver storagedriver.StorageDriver, t *testing.T) error { wc := walkConformance{driver: driver} defer func() { err := wc.cleanup() diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index e1f55b2a0..8f2b7fed8 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -241,7 +241,7 @@ func TestStorageClass(t *testing.T) { } -func TestConformance(t *testing.T) { +func TestWalk(t *testing.T) { if skipS3() != "" { t.Skip(skipS3()) } @@ -257,7 +257,7 @@ func TestConformance(t *testing.T) { t.Fatalf("unexpected error creating driver with standard storage: %v", err) } - err = conformance.Run(standardDriver, t) + err = conformance.TestWalk(standardDriver, t) if err != nil { t.Fatalf("conformance failed: %v", err) } From b2bdea24aaf30f94726a8fb5f4bcbf211d2fde59 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 16:45:56 -0400 Subject: [PATCH 15/22] storagedriver/s3: Forgoing any interface conformance tests and moved it all into S3 tests. --- .../storage/driver/conformance/conformance.go | 171 ------------------ registry/storage/driver/s3-aws/s3_test.go | 146 ++++++++++++++- 2 files changed, 141 insertions(+), 176 deletions(-) delete mode 100644 registry/storage/driver/conformance/conformance.go diff --git a/registry/storage/driver/conformance/conformance.go b/registry/storage/driver/conformance/conformance.go deleted file mode 100644 index 06fec2f5e..000000000 --- a/registry/storage/driver/conformance/conformance.go +++ /dev/null @@ -1,171 +0,0 @@ -package conformance - -import ( - "context" - "errors" - "fmt" - storagedriver "github.com/docker/distribution/registry/storage/driver" - "strings" - "testing" -) - -var fileset = map[string][]string{ - "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, - "/folder2": {"/folder2/file1"}, -} - -type walkConformance struct { - driver storagedriver.StorageDriver - created []string -} - -func (wc *walkConformance) init() error { - for _, paths := range fileset { - for _, path := range paths { - if _, isDir := fileset[path]; isDir { - continue // skip directories - } - err := wc.driver.PutContent(context.Background(), path, []byte("content "+path)) - if err != nil { - return errors.New(fmt.Sprintf("unable to create file %s: %s", path, err)) - } - wc.created = append(wc.created, path) - } - } - return nil -} - -func (wc *walkConformance) cleanup() error { - var lastError error - for _, path := range wc.created { - err := wc.driver.Delete(context.Background(), path) - if err != nil { - _ = fmt.Errorf("cleanup failed for path %s: %s", path, err) - lastError = err - } - } - return lastError -} - -func (wc *walkConformance) isDir(path string) bool { - _, isDir := fileset[path] - return isDir -} - -func TestWalk(driver storagedriver.StorageDriver, t *testing.T) error { - wc := walkConformance{driver: driver} - defer func() { - err := wc.cleanup() - if err != nil { - t.Fatalf("cleanup failed: %s", err) - } - }() - err := wc.init() - if err != nil { - return err - } - - noopFn := func(fileInfo storagedriver.FileInfo) error { return nil } - - tcs := []struct { - name string - fn storagedriver.WalkFn - from string - expected []string - err bool - }{ - { - name: "walk all", - fn: noopFn, - expected: []string{ - "/", - "/file1", - "/folder1", - "/folder1/file1", - "/folder2", - "/folder2/file1", - }, - }, - { - name: "skip directory", - fn: func(fileInfo storagedriver.FileInfo) error { - if fileInfo.Path() == "/folder1" { - return storagedriver.ErrSkipDir - } - if strings.Contains(fileInfo.Path(), "/folder1") { - t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) - } - return nil - }, - expected: []string{ - "/", - "/file1", - "/folder1", // return ErrSkipDir, skip anything under /folder1 - // skip /folder1/file1 - "/folder2", - "/folder2/file1", - }, - }, - { - name: "stop early", - fn: func(fileInfo storagedriver.FileInfo) error { - if fileInfo.Path() == "/folder1/file1" { - return storagedriver.ErrSkipDir - } - return nil - }, - expected: []string{ - "/", - "/file1", - "/folder1", - "/folder1/file1", - // stop early - }, - }, - { - name: "from folder", - fn: noopFn, - expected: []string{ - "/folder1", - "/folder1/file1", - }, - from: "/folder1", - }, - } - - for _, tc := range tcs { - var walked []string - if tc.from == "" { - tc.from = "/" - } - t.Run(tc.name, func(t *testing.T) { - err := wc.driver.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error { - walked = append(walked, fileInfo.Path()) - if fileInfo.IsDir() != wc.isDir(fileInfo.Path()) { - t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", wc.isDir(fileInfo.Path()), fileInfo.IsDir()) - } - return tc.fn(fileInfo) - }) - if tc.err && err == nil { - t.Fatalf("expected err") - } - if !tc.err && err != nil { - t.Fatalf(err.Error()) - } - compareWalked(t, tc.expected, walked) - }) - } - return nil -} - -func compareWalked(t *testing.T, expected, walked []string) { - if len(walked) != len(expected) { - t.Fatalf("Mismatch number of fileInfo walked %d expected %d; walked %s; expected %s;", len(walked), len(expected), walked, expected) - } - for i := range walked { - if walked[i] != expected[i] { - t.Fatalf("expected walked to come in order expected: walked %s", walked) - } - } -} diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index 8f2b7fed8..3385ac2c0 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -2,11 +2,12 @@ package s3 import ( "bytes" - "github.com/docker/distribution/registry/storage/driver/conformance" + "fmt" "io/ioutil" "math/rand" "os" "strconv" + "strings" "testing" "gopkg.in/check.v1" @@ -252,14 +253,138 @@ func TestWalk(t *testing.T) { } defer os.Remove(rootDir) - standardDriver, err := s3DriverConstructor(rootDir, s3.StorageClassStandard) + driver, err := s3DriverConstructor(rootDir, s3.StorageClassStandard) if err != nil { t.Fatalf("unexpected error creating driver with standard storage: %v", err) } - err = conformance.TestWalk(standardDriver, t) - if err != nil { - t.Fatalf("conformance failed: %v", err) + var fileset = map[string][]string{ + "/": {"/file1", "/folder1", "/folder2"}, + "/folder1": {"/folder1/file1"}, + "/folder2": {"/folder2/file1"}, + } + isDir := func(path string) bool { + _, isDir := fileset[path] + return isDir + } + + var created []string + for _, paths := range fileset { + for _, path := range paths { + if _, isDir := fileset[path]; isDir { + continue // skip directories + } + err := driver.PutContent(context.Background(), path, []byte("content "+path)) + if err != nil { + fmt.Printf("unable to create file %s: %s\n", path, err) + } + created = append(created, path) + } + } + + defer func() { + var lastErr error + for _, path := range created { + err := driver.Delete(context.Background(), path) + if err != nil { + _ = fmt.Errorf("cleanup failed for path %s: %s", path, err) + lastErr = err + } + } + if lastErr != nil { + t.Fatalf("cleanup failed: %s", err) + } + }() + + noopFn := func(fileInfo storagedriver.FileInfo) error { return nil } + + tcs := []struct { + name string + fn storagedriver.WalkFn + from string + expected []string + err bool + }{ + { + name: "walk all", + fn: noopFn, + expected: []string{ + "/", + "/file1", + "/folder1", + "/folder1/file1", + "/folder2", + "/folder2/file1", + }, + }, + { + name: "skip directory", + fn: func(fileInfo storagedriver.FileInfo) error { + if fileInfo.Path() == "/folder1" { + return storagedriver.ErrSkipDir + } + if strings.Contains(fileInfo.Path(), "/folder1") { + t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) + } + return nil + }, + expected: []string{ + "/", + "/file1", + "/folder1", // return ErrSkipDir, skip anything under /folder1 + // skip /folder1/file1 + "/folder2", + "/folder2/file1", + }, + }, + { + name: "stop early", + fn: func(fileInfo storagedriver.FileInfo) error { + if fileInfo.Path() == "/folder1/file1" { + return storagedriver.ErrSkipDir + } + return nil + }, + expected: []string{ + "/", + "/file1", + "/folder1", + "/folder1/file1", + // stop early + }, + }, + { + name: "from folder", + fn: noopFn, + expected: []string{ + "/folder1", + "/folder1/file1", + }, + from: "/folder1", + }, + } + + for _, tc := range tcs { + var walked []string + if tc.from == "" { + tc.from = "/" + } + t.Run(tc.name, func(t *testing.T) { + err := driver.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error { + walked = append(walked, fileInfo.Path()) + if fileInfo.IsDir() != isDir(fileInfo.Path()) { + t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", isDir(fileInfo.Path()), fileInfo.IsDir()) + } + return tc.fn(fileInfo) + }) + if tc.err && err == nil { + t.Fatalf("expected err") + } + if !tc.err && err != nil { + t.Fatalf(err.Error()) + } + compareWalked(t, tc.expected, walked) + }) } } @@ -349,3 +474,14 @@ func TestMoveWithMultipartCopy(t *testing.T) { t.Fatalf("unexpected error getting content: %v", err) } } + +func compareWalked(t *testing.T, expected, walked []string) { + if len(walked) != len(expected) { + t.Fatalf("Mismatch number of fileInfo walked %d expected %d; walked %s; expected %s;", len(walked), len(expected), walked, expected) + } + for i := range walked { + if walked[i] != expected[i] { + t.Fatalf("expected walked to come in order expected: walked %s", walked) + } + } +} From 05a2d10b761e8d931abda03e83dbfb36718bcf5e Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 16:47:51 -0400 Subject: [PATCH 16/22] storagedriver/s3: Cleaning up tests. --- registry/storage/driver/s3-aws/s3_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index 3385ac2c0..a1347c472 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -282,6 +282,7 @@ func TestWalk(t *testing.T) { } } + // cleanup defer func() { var lastErr error for _, path := range created { @@ -296,8 +297,6 @@ func TestWalk(t *testing.T) { } }() - noopFn := func(fileInfo storagedriver.FileInfo) error { return nil } - tcs := []struct { name string fn storagedriver.WalkFn @@ -307,7 +306,7 @@ func TestWalk(t *testing.T) { }{ { name: "walk all", - fn: noopFn, + fn: func(fileInfo storagedriver.FileInfo) error { return nil }, expected: []string{ "/", "/file1", @@ -355,7 +354,7 @@ func TestWalk(t *testing.T) { }, { name: "from folder", - fn: noopFn, + fn: func(fileInfo storagedriver.FileInfo) error { return nil }, expected: []string{ "/folder1", "/folder1/file1", From 9a4da20cf1480455ca25c7e615f5c7046fc22620 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Mon, 28 Jun 2021 16:53:33 -0400 Subject: [PATCH 17/22] storagedriver/s3: Reverting unintentional change. --- registry/storage/driver/s3-aws/s3_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index a1347c472..aea26658e 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -108,7 +108,7 @@ func init() { // Skip S3 storage driver tests if environment variable parameters are not provided skipS3 = func() string { - if accessKey == "" || secretKey == "" || bucket == "" || encrypt == "" { + if accessKey == "" || secretKey == "" || region == "" || bucket == "" || encrypt == "" { return "Must set AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, S3_BUCKET, and S3_ENCRYPT to run S3 tests" } return "" From 9618ba723151fa7a36a05ff03a84f5a7088e3fb9 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Tue, 29 Jun 2021 08:11:01 -0400 Subject: [PATCH 18/22] storagedriver/s3: Added Walk test case for dealing with errors --- registry/storage/driver/s3-aws/s3_test.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index aea26658e..390922761 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -2,6 +2,7 @@ package s3 import ( "bytes" + "errors" "fmt" "io/ioutil" "math/rand" @@ -268,6 +269,7 @@ func TestWalk(t *testing.T) { return isDir } + // create file structure matching fileset above var created []string for _, paths := range fileset { for _, path := range paths { @@ -351,6 +353,17 @@ func TestWalk(t *testing.T) { "/folder1/file1", // stop early }, + err: false, + }, + { + name: "error", + fn: func(fileInfo storagedriver.FileInfo) error { + return errors.New("foo") + }, + expected: []string{ + "/", + }, + err: true, }, { name: "from folder", From 8d38cde0f3b3bd3fcb5023967c1bff4a5543566e Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Tue, 29 Jun 2021 08:34:15 -0400 Subject: [PATCH 19/22] storagedriver/s3: Updating comments --- registry/storage/driver/s3-aws/s3.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index a728be92a..d5f42318a 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1102,6 +1102,16 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre ctx, done := dcontext.WithTrace(parentCtx) defer done("s3aws.ListObjectsV2Pages(%s)", path) + + // The s3 list object API will list all objects under the starting path recursively, in sorted, + // depth-first ordering https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html + // S3 does not return directory without the "delimiter":"/" argument passed, but they can to be + // inferred from object paths and then de-duplicated to avoid calling the walk fn redundantly. + // With files returned in sorted depth-first order, directories are inferred in the same order, + // so avoiding redundant directories is a matter of not passing the same directory to the walk fn twice in a row. + // ErrSkipDir is handled by explicitly skipping over any files under the skipped directory. This may sub-optimal + // for extreme edge cases but for the general use case in a registry, this is orders of magnitude + // faster than a more explicit recursive implementation. listObjectErr := s.ListObjectsV2PagesWithContext(ctx, listObjectsInput, func(objects *s3.ListObjectsV2Output, lastPage bool) bool { walkInfos := make([]storagedriver.FileInfoInternal, 0, len(objects.Contents)) From b9b0cac1222f3df238f363e93873b2ff0a37e41d Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Tue, 29 Jun 2021 10:29:05 -0400 Subject: [PATCH 20/22] storagedriver/s3: Major change to the S3 Walk impl to infer directories to walk between files. This is needed for manifest enumeration among others --- registry/storage/driver/s3-aws/s3.go | 80 ++++++++++++++++------- registry/storage/driver/s3-aws/s3_test.go | 64 +++++++++--------- 2 files changed, 88 insertions(+), 56 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index d5f42318a..8029490e4 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1091,6 +1091,7 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre // the most recent skip directory to avoid walking over undesirable files prevSkipDir string ) + prevDir = prefix + path listObjectsInput := &s3.ListObjectsV2Input{ Bucket: aws.String(d.Bucket), @@ -1116,12 +1117,28 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre walkInfos := make([]storagedriver.FileInfoInternal, 0, len(objects.Contents)) for _, file := range objects.Contents { + filePath := strings.Replace(*file.Key, d.s3Path(""), prefix, 1) + + // get a list of all inferred directories skipped between the previous directory and this file + dirs := directoryDiff(prevDir, filePath) + if len(dirs) > 0 { + for _, dir := range dirs { + walkInfos = append(walkInfos, storagedriver.FileInfoInternal{ + FileInfoFields: storagedriver.FileInfoFields{ + IsDir: true, + Path: dir, + }, + }) + prevDir = dir + } + } + walkInfos = append(walkInfos, storagedriver.FileInfoInternal{ FileInfoFields: storagedriver.FileInfoFields{ IsDir: false, Size: *file.Size, ModTime: *file.LastModified, - Path: strings.Replace(*file.Key, d.s3Path(""), prefix, 1), + Path: filePath, }, }) } @@ -1132,35 +1149,16 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre continue } - // walk over file's parent directory if not a duplicate - dir := filepath.Dir(walkInfo.Path()) - if dir != prevDir { - prevDir = dir - walkDirInfo := storagedriver.FileInfoInternal{ - FileInfoFields: storagedriver.FileInfoFields{ - IsDir: true, - Path: dir, - }, - } - err := f(walkDirInfo) - *objectCount++ - - if err != nil { - if err == storagedriver.ErrSkipDir { - prevSkipDir = dir - continue - } - retError = err - return false - } - } - err := f(walkInfo) *objectCount++ if err != nil { if err == storagedriver.ErrSkipDir { - // stop early without return error + if walkInfo.IsDir() { + prevSkipDir = walkInfo.Path() + continue + } + // is file, stop gracefully return false } retError = err @@ -1181,6 +1179,38 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre return nil } +// directoryDiff finds all directories that are not in common between +// the previous and current paths in sorted order. +// +// Eg 1 directoryDiff("/path/to/folder", "/path/to/folder/folder/file") +// => [ "/path/to/folder/folder" ], +// Eg 2 directoryDiff("/path/to/folder/folder1", "/path/to/folder/folder2/file") +// => [ "/path/to/folder/folder2" ] +// Eg 3 directoryDiff("/path/to/folder/folder1/file", "/path/to/folder/folder2/file") +// => [ "/path/to/folder/folder2" ] +// Eg 4 directoryDiff("/path/to/folder/folder1/file", "/path/to/folder/folder2/folder1/file") +// => [ "/path/to/folder/folder2", "/path/to/folder/folder2/folder1" ] +// Eg 5 directoryDiff("/", "/path/to/folder/folder/file") +// => [ "/path", "/path/to", "/path/to/folder", "/path/to/folder/folder" ], +func directoryDiff(prev, current string) []string { + var parents []string + + if prev == "" || current == "" { + return parents + } + + parent := current + for { + parent = filepath.Dir(parent) + if parent == "/" || parent == prev || strings.HasPrefix(prev, parent) { + break + } + parents = append(parents, parent) + } + sort.Sort(sort.StringSlice(parents)) + return parents +} + func (d *driver) s3Path(path string) string { return strings.TrimLeft(strings.TrimRight(d.RootDirectory, "/")+path, "/") } diff --git a/registry/storage/driver/s3-aws/s3_test.go b/registry/storage/driver/s3-aws/s3_test.go index 390922761..4c80863a4 100644 --- a/registry/storage/driver/s3-aws/s3_test.go +++ b/registry/storage/driver/s3-aws/s3_test.go @@ -259,29 +259,24 @@ func TestWalk(t *testing.T) { t.Fatalf("unexpected error creating driver with standard storage: %v", err) } - var fileset = map[string][]string{ - "/": {"/file1", "/folder1", "/folder2"}, - "/folder1": {"/folder1/file1"}, - "/folder2": {"/folder2/file1"}, - } - isDir := func(path string) bool { - _, isDir := fileset[path] - return isDir + var fileset = []string{ + "/file1", + "/folder1/file1", + "/folder2/file1", + "/folder3/subfolder1/subfolder1/file1", + "/folder3/subfolder2/subfolder1/file1", + "/folder4/file1", } // create file structure matching fileset above var created []string - for _, paths := range fileset { - for _, path := range paths { - if _, isDir := fileset[path]; isDir { - continue // skip directories - } - err := driver.PutContent(context.Background(), path, []byte("content "+path)) - if err != nil { - fmt.Printf("unable to create file %s: %s\n", path, err) - } - created = append(created, path) + for _, path := range fileset { + err := driver.PutContent(context.Background(), path, []byte("content "+path)) + if err != nil { + fmt.Printf("unable to create file %s: %s\n", path, err) + continue } + created = append(created, path) } // cleanup @@ -310,32 +305,43 @@ func TestWalk(t *testing.T) { name: "walk all", fn: func(fileInfo storagedriver.FileInfo) error { return nil }, expected: []string{ - "/", "/file1", "/folder1", "/folder1/file1", "/folder2", "/folder2/file1", + "/folder3", + "/folder3/subfolder1", + "/folder3/subfolder1/subfolder1", + "/folder3/subfolder1/subfolder1/file1", + "/folder3/subfolder2", + "/folder3/subfolder2/subfolder1", + "/folder3/subfolder2/subfolder1/file1", + "/folder4", + "/folder4/file1", }, }, { name: "skip directory", fn: func(fileInfo storagedriver.FileInfo) error { - if fileInfo.Path() == "/folder1" { + if fileInfo.Path() == "/folder3" { return storagedriver.ErrSkipDir } - if strings.Contains(fileInfo.Path(), "/folder1") { - t.Fatalf("skipped dir %s and should not walk %s", "/folder1", fileInfo.Path()) + if strings.Contains(fileInfo.Path(), "/folder3") { + t.Fatalf("skipped dir %s and should not walk %s", "/folder3", fileInfo.Path()) } return nil }, expected: []string{ - "/", "/file1", - "/folder1", // return ErrSkipDir, skip anything under /folder1 - // skip /folder1/file1 + "/folder1", + "/folder1/file1", "/folder2", "/folder2/file1", + "/folder3", + // folder 3 contents skipped + "/folder4", + "/folder4/file1", }, }, { @@ -347,7 +353,6 @@ func TestWalk(t *testing.T) { return nil }, expected: []string{ - "/", "/file1", "/folder1", "/folder1/file1", @@ -361,7 +366,7 @@ func TestWalk(t *testing.T) { return errors.New("foo") }, expected: []string{ - "/", + "/file1", }, err: true, }, @@ -384,9 +389,6 @@ func TestWalk(t *testing.T) { t.Run(tc.name, func(t *testing.T) { err := driver.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error { walked = append(walked, fileInfo.Path()) - if fileInfo.IsDir() != isDir(fileInfo.Path()) { - t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", isDir(fileInfo.Path()), fileInfo.IsDir()) - } return tc.fn(fileInfo) }) if tc.err && err == nil { @@ -493,7 +495,7 @@ func compareWalked(t *testing.T, expected, walked []string) { } for i := range walked { if walked[i] != expected[i] { - t.Fatalf("expected walked to come in order expected: walked %s", walked) + t.Fatalf("walked in unexpected order: expected %s; walked %s", expected, walked) } } } From d257d6ce28d3e5a56591e554ece7c787f7436ba2 Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Tue, 29 Jun 2021 17:13:45 -0400 Subject: [PATCH 21/22] storagedriver/s3: Updating the directoryDiff naming and replace sort with reverse --- registry/storage/driver/s3-aws/s3.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 8029490e4..153123da5 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1193,10 +1193,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre // Eg 5 directoryDiff("/", "/path/to/folder/folder/file") // => [ "/path", "/path/to", "/path/to/folder", "/path/to/folder/folder" ], func directoryDiff(prev, current string) []string { - var parents []string + var paths []string if prev == "" || current == "" { - return parents + return paths } parent := current @@ -1205,10 +1205,16 @@ func directoryDiff(prev, current string) []string { if parent == "/" || parent == prev || strings.HasPrefix(prev, parent) { break } - parents = append(parents, parent) + paths = append(paths, parent) + } + reverse(paths) + return paths +} + +func reverse(s []string) { + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] } - sort.Sort(sort.StringSlice(parents)) - return parents } func (d *driver) s3Path(path string) string { From dec90d3b1aa8efd43cbc8ed5e08fa6879223b59f Mon Sep 17 00:00:00 2001 From: Collin Shoop Date: Wed, 30 Jun 2021 08:42:52 -0400 Subject: [PATCH 22/22] storagedriver/s3: More comment wording --- registry/storage/driver/s3-aws/s3.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 153123da5..187f7a5de 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1104,13 +1104,13 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre ctx, done := dcontext.WithTrace(parentCtx) defer done("s3aws.ListObjectsV2Pages(%s)", path) - // The s3 list object API will list all objects under the starting path recursively, in sorted, - // depth-first ordering https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html - // S3 does not return directory without the "delimiter":"/" argument passed, but they can to be - // inferred from object paths and then de-duplicated to avoid calling the walk fn redundantly. - // With files returned in sorted depth-first order, directories are inferred in the same order, - // so avoiding redundant directories is a matter of not passing the same directory to the walk fn twice in a row. - // ErrSkipDir is handled by explicitly skipping over any files under the skipped directory. This may sub-optimal + // When the "delimiter" argument is omitted, the S3 list API will list all objects in the bucket + // recursively, omitting directory paths. Objects are listed in sorted, depth-first order so we + // can infer all the directories by comparing each object path to the last one we saw. + // See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html + + // With files returned in sorted depth-first order, directories are inferred in the same order. + // ErrSkipDir is handled by explicitly skipping over any files under the skipped directory. This may be sub-optimal // for extreme edge cases but for the general use case in a registry, this is orders of magnitude // faster than a more explicit recursive implementation. listObjectErr := s.ListObjectsV2PagesWithContext(ctx, listObjectsInput, func(objects *s3.ListObjectsV2Output, lastPage bool) bool { @@ -1119,7 +1119,7 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre for _, file := range objects.Contents { filePath := strings.Replace(*file.Key, d.s3Path(""), prefix, 1) - // get a list of all inferred directories skipped between the previous directory and this file + // get a list of all inferred directories between the previous directory and this file dirs := directoryDiff(prevDir, filePath) if len(dirs) > 0 { for _, dir := range dirs {