add concurrency limits for tag lookup and untag

Harbor is using the distribution for it's (harbor-registry) registry component.
The harbor GC will call into the registry to delete the manifest, which in turn
then does a lookup for all tags that reference the deleted manifest.
To find the tag references, the registry will iterate every tag in the repository
and read it's link file to check if it matches the deleted manifest (i.e. to see
if uses the same sha256 digest). So, the more tags in repository, the worse the
performance will be (as there will be more s3 API calls occurring for the tag
directory lookups and tag file reads).

Therefore, we can use concurrent lookup and untag to optimize performance as described in https://github.com/goharbor/harbor/issues/12948.

P.S. This optimization was originally contributed by @Antiarchitect, now I would like to take it over.
Thanks @Antiarchitect's efforts with PR https://github.com/distribution/distribution/pull/3890.

Signed-off-by: Liang Zheng <zhengliang0901@gmail.com>
This commit is contained in:
Liang Zheng
2024-04-18 15:56:26 +08:00
parent a5882d6646
commit a2afe23f38
10 changed files with 153 additions and 24 deletions

View File

@@ -3,6 +3,7 @@ package storage
import (
"context"
"regexp"
"runtime"
"github.com/distribution/distribution/v3"
"github.com/distribution/distribution/v3/registry/storage/cache"
@@ -10,6 +11,10 @@ import (
"github.com/distribution/reference"
)
var (
DefaultConcurrencyLimit = runtime.GOMAXPROCS(0)
)
// registry is the top-level implementation of Registry for use in the storage
// package. All instances should descend from this object.
type registry struct {
@@ -18,6 +23,7 @@ type registry struct {
statter *blobStatter // global statter service.
blobDescriptorCacheProvider cache.BlobDescriptorCacheProvider
deleteEnabled bool
tagLookupConcurrencyLimit int
resumableDigestEnabled bool
blobDescriptorServiceFactory distribution.BlobDescriptorServiceFactory
manifestURLs manifestURLs
@@ -40,6 +46,13 @@ func EnableRedirect(registry *registry) error {
return nil
}
func TagLookupConcurrencyLimit(concurrencyLimit int) RegistryOption {
return func(registry *registry) error {
registry.tagLookupConcurrencyLimit = concurrencyLimit
return nil
}
}
// EnableDelete is a functional option for NewRegistry. It enables deletion on
// the registry.
func EnableDelete(registry *registry) error {
@@ -184,9 +197,14 @@ func (repo *repository) Named() reference.Named {
}
func (repo *repository) Tags(ctx context.Context) distribution.TagService {
limit := DefaultConcurrencyLimit
if repo.tagLookupConcurrencyLimit > 0 {
limit = repo.tagLookupConcurrencyLimit
}
tags := &tagStore{
repository: repo,
blobStore: repo.registry.blobStore,
repository: repo,
blobStore: repo.registry.blobStore,
concurrencyLimit: limit,
}
return tags