This commit is contained in:
vitshev 2025-02-11 12:25:54 +01:00 committed by Vitalii Shevchenko
parent 00efb5e915
commit b087753151
No known key found for this signature in database
GPG Key ID: 8134E7CAB222896C
3 changed files with 313 additions and 29 deletions

View File

@ -8,6 +8,7 @@ import (
"github.com/distribution/distribution/v3/registry/storage"
"github.com/distribution/distribution/v3/registry/storage/driver/factory"
"github.com/distribution/distribution/v3/version"
"github.com/opencontainers/go-digest"
"github.com/spf13/cobra"
)
@ -84,3 +85,115 @@ var GCCmd = &cobra.Command{
}
},
}
func GetUsedBlobs(args []string) (map[storage.UsedBlob]struct{}, []storage.ManifestDel, error) {
config, err := resolveConfiguration(args)
if err != nil {
return nil, nil, fmt.Errorf("configuration error: %v", err)
}
ctx := dcontext.Background()
ctx, err = configureLogging(ctx, config)
if err != nil {
return nil, nil, fmt.Errorf("unable to configure logging with config: %s", err)
}
driver, err := factory.Create(ctx, config.Storage.Type(), config.Storage.Parameters())
if err != nil {
return nil, nil, fmt.Errorf("failed to construct %s driver: %v", config.Storage.Type(), err)
}
registry, err := storage.NewRegistry(ctx, driver)
if err != nil {
return nil, nil, fmt.Errorf("failed to construct registry: %v", err)
}
usedBlobs, manifests, err := storage.GetUsedBlobs(ctx, registry)
if err != nil {
return nil, nil, fmt.Errorf("failed to get used blobs: %v", err)
}
return usedBlobs, manifests, nil
}
func GetBlobs(args []string) (map[digest.Digest]struct{}, error) {
config, err := resolveConfiguration(args)
if err != nil {
return nil, fmt.Errorf("configuration error: %v", err)
}
ctx := dcontext.Background()
ctx, err = configureLogging(ctx, config)
if err != nil {
return nil, fmt.Errorf("unable to configure logging with config: %s", err)
}
driver, err := factory.Create(ctx, config.Storage.Type(), config.Storage.Parameters())
if err != nil {
return nil, fmt.Errorf("failed to construct %s driver: %v", config.Storage.Type(), err)
}
registry, err := storage.NewRegistry(ctx, driver)
if err != nil {
return nil, fmt.Errorf("failed to construct registry: %v", err)
}
blobService := registry.Blobs()
blobs := make(map[digest.Digest]struct{})
err = blobService.Enumerate(ctx, func(dgst digest.Digest) error {
blobs[dgst] = struct{}{}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to get blobs: %v", err)
}
return blobs, nil
}
// GCCmd is the cobra command that corresponds to the garbage-collect subcommand
var TractoGCCmd = &cobra.Command{
Use: "tracto-gc <config>",
Short: "`tracto-gc` deletes layers not referenced by any manifests",
Long: "`tracto-gc` deletes layers not referenced by any manifests",
Run: func(cmd *cobra.Command, args []string) {
config, err := resolveConfiguration(args)
if err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n", err)
// nolint:errcheck
cmd.Usage()
os.Exit(1)
}
ctx := dcontext.Background()
ctx, err = configureLogging(ctx, config)
if err != nil {
fmt.Fprintf(os.Stderr, "unable to configure logging with config: %s", err)
os.Exit(1)
}
driver, err := factory.Create(ctx, config.Storage.Type(), config.Storage.Parameters())
if err != nil {
fmt.Fprintf(os.Stderr, "failed to construct %s driver: %v", config.Storage.Type(), err)
os.Exit(1)
}
registry, err := storage.NewRegistry(ctx, driver)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err)
os.Exit(1)
}
usedBlobs, manifets, err := storage.GetUsedBlobs(ctx, registry)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err)
os.Exit(1)
}
print(usedBlobs, manifets)
},
}

View File

@ -2,7 +2,6 @@ package storage
import (
"context"
"errors"
"fmt"
"github.com/distribution/distribution/v3"
@ -29,6 +28,11 @@ type ManifestDel struct {
Tags []string
}
type UsedBlob struct {
Digest digest.Digest
Repo string
}
// MarkAndSweep performs a mark and sweep of registry data
func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, opts GCOpts) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
@ -38,7 +42,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
// mark
markSet := make(map[digest.Digest]struct{})
deleteLayerSet := make(map[string][]digest.Digest)
//deleteLayerSet := make(map[string][]digest.Digest)
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
@ -63,6 +67,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator")
}
// Читаем все ревизии
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if opts.RemoveUntagged {
// fetch all tags where this manifest is the latest one
@ -110,24 +115,27 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
return err
}
}
blobService := repository.Blobs(ctx)
layerEnumerator, ok := blobService.(distribution.ManifestEnumerator)
if !ok {
return errors.New("unable to convert BlobService into ManifestEnumerator")
}
// не перебираем слои
// если слоя нет в списке значит он принаджелит антегнутому манифесту
//blobService := repository.Blobs(ctx)
//layerEnumerator, ok := blobService.(distribution.ManifestEnumerator)
//if !ok {
// return errors.New("unable to convert BlobService into ManifestEnumerator")
//}
var deleteLayers []digest.Digest
err = layerEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if _, ok := markSet[dgst]; !ok {
deleteLayers = append(deleteLayers, dgst)
}
return nil
})
if len(deleteLayers) > 0 {
deleteLayerSet[repoName] = deleteLayers
}
//var deleteLayers []digest.Digest
//err = layerEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
// if _, ok := markSet[dgst]; !ok {
// deleteLayers = append(deleteLayers, dgst)
// }
// return nil
//})
//if len(deleteLayers) > 0 {
// deleteLayerSet[repoName] = deleteLayers
//}
return err
})
if err != nil {
return fmt.Errorf("failed to mark: %v", err)
}
@ -168,18 +176,18 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
}
}
for repo, dgsts := range deleteLayerSet {
for _, dgst := range dgsts {
emit("%s: layer link eligible for deletion: %s", repo, dgst)
if opts.DryRun {
continue
}
err = vacuum.RemoveLayer(repo, dgst)
if err != nil {
return fmt.Errorf("failed to delete layer link %s of repo %s: %v", dgst, repo, err)
}
}
}
//for repo, dgsts := range deleteLayerSet {
// for _, dgst := range dgsts {
// emit("%s: layer link eligible for deletion: %s", repo, dgst)
// if opts.DryRun {
// continue
// }
// err = vacuum.RemoveLayer(repo, dgst)
// if err != nil {
// return fmt.Errorf("failed to delete layer link %s of repo %s: %v", dgst, repo, err)
// }
// }
//}
return err
}
@ -220,3 +228,105 @@ func markManifestReferences(dgst digest.Digest, manifestService distribution.Man
}
return nil
}
// GetUsedBlobs возвращает список blobs, которые используются в хранилище
func GetUsedBlobs(ctx context.Context, registry distribution.Namespace) (map[UsedBlob]struct{}, []ManifestDel, error) {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok {
return nil, nil, fmt.Errorf("unable to convert Namespace to RepositoryEnumerator")
}
// markSet содержит все используемые blobs
markSet := make(map[UsedBlob]struct{})
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
named, err := reference.WithName(repoName)
if err != nil {
return fmt.Errorf("failed to parse repo name %s: %v", repoName, err)
}
repository, err := registry.Repository(ctx, named)
if err != nil {
return fmt.Errorf("failed to construct repository: %v", err)
}
manifestService, err := repository.Manifests(ctx)
if err != nil {
return fmt.Errorf("failed to construct manifest service: %v", err)
}
manifestEnumerator, ok := manifestService.(distribution.ManifestEnumerator)
if !ok {
return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator")
}
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
fmt.Printf("%s: marking blobs for %s\n", repoName, dgst)
tags, err := repository.Tags(ctx).Lookup(ctx, v1.Descriptor{Digest: dgst})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err)
}
fmt.Printf("%s: marking blobs for %s tags %s\n", repoName, dgst, tags)
if len(tags) == 0 {
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
if _, ok := err.(distribution.ErrRepositoryUnknown); ok {
emit("manifest tags path of repository %s does not exist", repoName)
return nil
}
return fmt.Errorf("failed to retrieve tags %v", err)
}
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags})
return nil
}
markSet[UsedBlob{
Digest: dgst,
Repo: repoName,
}] = struct{}{}
return markManifestReferences(dgst, manifestService, ctx, func(d digest.Digest) bool {
_, marked := markSet[UsedBlob{
Digest: d,
Repo: repoName,
}]
if !marked {
markSet[UsedBlob{
Digest: dgst,
Repo: repoName,
}] = struct{}{}
}
return marked
})
})
if err != nil {
return err
}
return nil
})
manifestBlobs := make(map[digest.Digest]struct{})
for obj := range markSet {
manifestBlobs[obj.Digest] = struct{}{}
}
manifestArr = unmarkReferencedManifest(manifestArr, manifestBlobs)
if err != nil {
return nil, nil, fmt.Errorf("failed to mark used blobs: %v", err)
}
return markSet, manifestArr, nil
}

View File

@ -923,3 +923,64 @@ func TestTaggedManifestlistWithDeletedReference(t *testing.T) {
t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0)
}
}
func TestGetUsedBlobsReference(t *testing.T) {
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/untaggedlist/deleteref")
_ = uploadRandomSchema2Image(t, repo)
_ = uploadRandomSchema2Image(t, repo)
_ = uploadRandomSchema2Image(t, repo)
uploadRandomSchema2Image(t, repo)
// Run GC
blobs, err := GetUsedBlobs(dcontext.Background(), registry)
if err != nil {
t.Fatalf("Failed get used blobs: %v", err)
}
after := allBlobs(t, registry)
if len(blobs) != len(after) {
t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0)
}
}
func TestGetUsedBlobsReferenceWithoutDeleted(t *testing.T) {
ctx := dcontext.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/untaggedlist/deleteref")
manifestService, err := repo.Manifests(ctx)
if err != nil {
t.Fatalf("%v", err)
}
image1 := uploadRandomSchema2Image(t, repo)
_ = uploadRandomSchema2Image(t, repo)
_ = uploadRandomSchema2Image(t, repo)
_ = uploadRandomSchema2Image(t, repo)
err = manifestService.Delete(ctx, image1.manifestDigest)
if err != nil {
t.Fatalf("Failed to delete image: %v", err)
}
// Run GC
blobs, err := GetUsedBlobs(dcontext.Background(), registry)
if err != nil {
t.Fatalf("Failed get used blobs: %v", err)
}
after := allBlobs(t, registry)
if len(blobs) != len(after) {
t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0)
}
}