mirror of
https://github.com/distribution/distribution.git
synced 2025-09-16 15:21:37 +00:00
Pass the last paging flag to storage drivers
Storage drivers may be able to take advantage of the hint to start their walk more efficiently. For S3: The API takes a start-after parameter. Registries with many repositories can drastically reduce calls to s3 by telling s3 to only list results lexographically after the last parameter. For the fallback: We can start deeper in the tree and avoid statting the files and directories before the hint in a walk. For a filesystem this improves performance a little, but many of the API based drivers are currently treated like a filesystem, so this drastically improves the performance of GCP and Azure blob. Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
This commit is contained in:
@@ -1040,21 +1040,21 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int
|
||||
|
||||
// Walk traverses a filesystem defined within driver, starting
|
||||
// from the given path, calling f on each file
|
||||
func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error {
|
||||
var objectCount int64
|
||||
if err := d.doWalk(ctx, &objectCount, from, f); err != nil {
|
||||
return err
|
||||
func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn, options ...func(*storagedriver.WalkOptions)) error {
|
||||
walkOptions := &storagedriver.WalkOptions{}
|
||||
for _, o := range options {
|
||||
o(walkOptions)
|
||||
}
|
||||
|
||||
// S3 doesn't have the concept of empty directories, so it'll return path not found if there are no objects
|
||||
if objectCount == 0 {
|
||||
return storagedriver.PathNotFoundError{Path: from}
|
||||
var objectCount int64
|
||||
if err := d.doWalk(ctx, &objectCount, from, walkOptions.StartAfterHint, f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, from string, f storagedriver.WalkFn) error {
|
||||
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, from string, startAfter string, f storagedriver.WalkFn) error {
|
||||
var (
|
||||
retError error
|
||||
// the most recent directory walked for de-duping
|
||||
@@ -1075,13 +1075,14 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, from stri
|
||||
}
|
||||
|
||||
listObjectsInput := &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Prefix: aws.String(d.s3Path(path)),
|
||||
MaxKeys: aws.Int64(listMax),
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Prefix: aws.String(d.s3Path(path)),
|
||||
MaxKeys: aws.Int64(listMax),
|
||||
StartAfter: aws.String(d.s3Path(startAfter)),
|
||||
}
|
||||
|
||||
ctx, done := dcontext.WithTrace(parentCtx)
|
||||
defer done("s3aws.ListObjectsV2Pages(%s)", path)
|
||||
defer done("s3aws.ListObjectsV2PagesWithContext(%s)", listObjectsInput)
|
||||
|
||||
// When the "delimiter" argument is omitted, the S3 list API will list all objects in the bucket
|
||||
// recursively, omitting directory paths. Objects are listed in sorted, depth-first order so we
|
||||
@@ -1133,11 +1134,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, from stri
|
||||
|
||||
if err != nil {
|
||||
if err == storagedriver.ErrSkipDir {
|
||||
if walkInfo.IsDir() {
|
||||
prevSkipDir = walkInfo.Path()
|
||||
continue
|
||||
}
|
||||
// is file, stop gracefully
|
||||
prevSkipDir = walkInfo.Path()
|
||||
continue
|
||||
}
|
||||
if err == storagedriver.ErrFilledBuffer {
|
||||
return false
|
||||
}
|
||||
retError = err
|
||||
@@ -1187,7 +1187,7 @@ func directoryDiff(prev, current string) []string {
|
||||
parent := current
|
||||
for {
|
||||
parent = filepath.Dir(parent)
|
||||
if parent == "/" || parent == prev || strings.HasPrefix(prev, parent) {
|
||||
if parent == "/" || parent == prev || strings.HasPrefix(prev+"/", parent+"/") {
|
||||
break
|
||||
}
|
||||
paths = append(paths, parent)
|
||||
|
@@ -491,6 +491,7 @@ func TestWalk(t *testing.T) {
|
||||
|
||||
fileset := []string{
|
||||
"/file1",
|
||||
"/folder1-suffix/file1",
|
||||
"/folder1/file1",
|
||||
"/folder2/file1",
|
||||
"/folder3/subfolder1/subfolder1/file1",
|
||||
@@ -524,18 +525,23 @@ func TestWalk(t *testing.T) {
|
||||
}
|
||||
}()
|
||||
|
||||
noopFn := func(fileInfo storagedriver.FileInfo) error { return nil }
|
||||
|
||||
tcs := []struct {
|
||||
name string
|
||||
fn storagedriver.WalkFn
|
||||
from string
|
||||
options []func(*storagedriver.WalkOptions)
|
||||
expected []string
|
||||
err bool
|
||||
}{
|
||||
{
|
||||
name: "walk all",
|
||||
fn: func(fileInfo storagedriver.FileInfo) error { return nil },
|
||||
fn: noopFn,
|
||||
expected: []string{
|
||||
"/file1",
|
||||
"/folder1-suffix",
|
||||
"/folder1-suffix/file1",
|
||||
"/folder1",
|
||||
"/folder1/file1",
|
||||
"/folder2",
|
||||
@@ -564,6 +570,8 @@ func TestWalk(t *testing.T) {
|
||||
},
|
||||
expected: []string{
|
||||
"/file1",
|
||||
"/folder1-suffix",
|
||||
"/folder1-suffix/file1",
|
||||
"/folder1",
|
||||
"/folder1/file1",
|
||||
"/folder2",
|
||||
@@ -574,22 +582,101 @@ func TestWalk(t *testing.T) {
|
||||
"/folder4/file1",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "start late without from",
|
||||
fn: noopFn,
|
||||
options: []func(*storagedriver.WalkOptions){
|
||||
storagedriver.WithStartAfterHint("/folder3/subfolder1/subfolder1/file1"),
|
||||
},
|
||||
expected: []string{
|
||||
// start late
|
||||
"/folder3",
|
||||
"/folder3/subfolder2",
|
||||
"/folder3/subfolder2/subfolder1",
|
||||
"/folder3/subfolder2/subfolder1/file1",
|
||||
"/folder4",
|
||||
"/folder4/file1",
|
||||
},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "start late with from",
|
||||
fn: noopFn,
|
||||
from: "/folder3",
|
||||
options: []func(*storagedriver.WalkOptions){
|
||||
storagedriver.WithStartAfterHint("/folder3/subfolder1/subfolder1/file1"),
|
||||
},
|
||||
expected: []string{
|
||||
// start late
|
||||
"/folder3/subfolder2",
|
||||
"/folder3/subfolder2/subfolder1",
|
||||
"/folder3/subfolder2/subfolder1/file1",
|
||||
},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "start after from",
|
||||
fn: noopFn,
|
||||
from: "/folder1",
|
||||
options: []func(*storagedriver.WalkOptions){
|
||||
storagedriver.WithStartAfterHint("/folder2"),
|
||||
},
|
||||
expected: []string{},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "start matches from",
|
||||
fn: noopFn,
|
||||
from: "/folder3",
|
||||
options: []func(*storagedriver.WalkOptions){
|
||||
storagedriver.WithStartAfterHint("/folder3"),
|
||||
},
|
||||
expected: []string{
|
||||
"/folder3/subfolder1",
|
||||
"/folder3/subfolder1/subfolder1",
|
||||
"/folder3/subfolder1/subfolder1/file1",
|
||||
"/folder3/subfolder2",
|
||||
"/folder3/subfolder2/subfolder1",
|
||||
"/folder3/subfolder2/subfolder1/file1",
|
||||
},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "start doesn't exist",
|
||||
fn: noopFn,
|
||||
from: "/folder3",
|
||||
options: []func(*storagedriver.WalkOptions){
|
||||
storagedriver.WithStartAfterHint("/folder3/notafolder/notafile"),
|
||||
},
|
||||
expected: []string{
|
||||
"/folder3/subfolder1",
|
||||
"/folder3/subfolder1/subfolder1",
|
||||
"/folder3/subfolder1/subfolder1/file1",
|
||||
"/folder3/subfolder2",
|
||||
"/folder3/subfolder2/subfolder1",
|
||||
"/folder3/subfolder2/subfolder1/file1",
|
||||
},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "stop early",
|
||||
fn: func(fileInfo storagedriver.FileInfo) error {
|
||||
if fileInfo.Path() == "/folder1/file1" {
|
||||
return storagedriver.ErrSkipDir
|
||||
return storagedriver.ErrFilledBuffer
|
||||
}
|
||||
return nil
|
||||
},
|
||||
expected: []string{
|
||||
"/file1",
|
||||
"/folder1-suffix",
|
||||
"/folder1-suffix/file1",
|
||||
"/folder1",
|
||||
"/folder1/file1",
|
||||
// stop early
|
||||
},
|
||||
err: false,
|
||||
},
|
||||
|
||||
{
|
||||
name: "error",
|
||||
fn: func(fileInfo storagedriver.FileInfo) error {
|
||||
@@ -602,7 +689,7 @@ func TestWalk(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "from folder",
|
||||
fn: func(fileInfo storagedriver.FileInfo) error { return nil },
|
||||
fn: noopFn,
|
||||
expected: []string{
|
||||
"/folder1/file1",
|
||||
},
|
||||
@@ -619,7 +706,7 @@ func TestWalk(t *testing.T) {
|
||||
err := drvr.Walk(context.Background(), tc.from, func(fileInfo storagedriver.FileInfo) error {
|
||||
walked = append(walked, fileInfo.Path())
|
||||
return tc.fn(fileInfo)
|
||||
})
|
||||
}, tc.options...)
|
||||
if tc.err && err == nil {
|
||||
t.Fatalf("expected err")
|
||||
}
|
||||
|
Reference in New Issue
Block a user