From 3b2e424820d0f6c85d8cb265795820fbcf5a9f73 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 20 Jul 2025 22:02:54 -0700 Subject: [PATCH] Use a standalone table to store deletion files so that all kinds of storage could reuse the deletion infrastructure --- models/db/file_status.go | 12 -- models/fixtures/attachment.yml | 13 -- models/issues/comment.go | 5 +- models/issues/comment_list.go | 6 +- models/issues/comment_test.go | 4 +- models/issues/issue_list.go | 1 - models/migrations/migrations.go | 2 +- models/migrations/v1_25/main_test.go | 14 -- models/migrations/v1_25/v321.go | 73 ++------- models/migrations/v1_25/v321_test.go | 35 ----- models/repo/attachment.go | 181 +++++++--------------- models/repo/release.go | 1 - models/system/storage_cleanup.go | 42 +++++ models/user/main_test.go | 4 +- modules/storage/storage.go | 34 ++++ options/locale/locale_en-US.ini | 2 +- routers/init.go | 4 +- services/attachment/attachment.go | 111 +------------ services/attachment/attachment_test.go | 3 +- services/cron/tasks_extended.go | 10 +- services/doctor/repository.go | 3 +- services/issue/comments.go | 21 ++- services/issue/issue.go | 42 ++--- services/issue/issue_test.go | 14 +- services/issue/main_test.go | 4 +- services/migrations/gitea_uploader.go | 1 - services/release/release.go | 16 +- services/release/release_test.go | 3 +- services/repository/delete.go | 18 ++- services/repository/main_test.go | 4 +- services/storagecleanup/storagecleanup.go | 116 ++++++++++++++ services/user/delete.go | 11 +- services/user/user.go | 10 +- services/user/user_test.go | 4 +- 34 files changed, 365 insertions(+), 459 deletions(-) delete mode 100644 models/db/file_status.go delete mode 100644 models/migrations/v1_25/main_test.go delete mode 100644 models/migrations/v1_25/v321_test.go create mode 100644 models/system/storage_cleanup.go create mode 100644 services/storagecleanup/storagecleanup.go diff --git a/models/db/file_status.go b/models/db/file_status.go deleted file mode 100644 index 4ed1186fb56..00000000000 --- a/models/db/file_status.go +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2025 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package db - -// FileStatus represents the status of a file in the disk. -type FileStatus int - -const ( - FileStatusNormal FileStatus = iota + 1 // FileStatusNormal indicates the file is normal and exists on disk. - FileStatusToBeDeleted // FileStatusToBeDeleted indicates the file is marked for deletion but still exists on disk. -) diff --git a/models/fixtures/attachment.yml b/models/fixtures/attachment.yml index 06f578f7b7b..b86a15b2826 100644 --- a/models/fixtures/attachment.yml +++ b/models/fixtures/attachment.yml @@ -8,7 +8,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -22,7 +21,6 @@ comment_id: 0 name: attach2 download_count: 1 - status: 1 size: 0 created_unix: 946684800 @@ -36,7 +34,6 @@ comment_id: 1 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -50,7 +47,6 @@ comment_id: 1 name: attach2 download_count: 1 - status: 1 size: 0 created_unix: 946684800 @@ -64,7 +60,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -78,7 +73,6 @@ comment_id: 2 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -92,7 +86,6 @@ comment_id: 2 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -106,7 +99,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -120,7 +112,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -134,7 +125,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -148,7 +138,6 @@ comment_id: 0 name: attach1 download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -162,7 +151,6 @@ comment_id: 0 name: README.md download_count: 0 - status: 1 size: 0 created_unix: 946684800 @@ -176,6 +164,5 @@ comment_id: 7 name: code_comment_uploaded_attachment.png download_count: 0 - status: 1 size: 0 created_unix: 946684812 diff --git a/models/issues/comment.go b/models/issues/comment.go index d230b259457..db48e4ffac1 100644 --- a/models/issues/comment.go +++ b/models/issues/comment.go @@ -1116,7 +1116,8 @@ func UpdateComment(ctx context.Context, c *Comment, contentVersion int, doer *us // DeleteComment deletes the comment func DeleteComment(ctx context.Context, comment *Comment) error { - if _, err := db.GetEngine(ctx).ID(comment.ID).NoAutoCondition().Delete(comment); err != nil { + e := db.GetEngine(ctx) + if _, err := e.ID(comment.ID).NoAutoCondition().Delete(comment); err != nil { return err } @@ -1131,7 +1132,7 @@ func DeleteComment(ctx context.Context, comment *Comment) error { return err } } - if _, err := db.GetEngine(ctx).Table("action"). + if _, err := e.Table("action"). Where("comment_id = ?", comment.ID). Update(map[string]any{ "is_deleted": true, diff --git a/models/issues/comment_list.go b/models/issues/comment_list.go index bb95da7710b..f6c485449f6 100644 --- a/models/issues/comment_list.go +++ b/models/issues/comment_list.go @@ -349,10 +349,7 @@ func (comments CommentList) LoadAttachmentsByIssue(ctx context.Context) error { } attachments := make([]*repo_model.Attachment, 0, len(comments)/2) - if err := db.GetEngine(ctx). - Where("issue_id=? AND comment_id>0", comments[0].IssueID). - And("status = ?", db.FileStatusNormal). - Find(&attachments); err != nil { + if err := db.GetEngine(ctx).Where("issue_id=? AND comment_id>0", comments[0].IssueID).Find(&attachments); err != nil { return err } @@ -380,7 +377,6 @@ func (comments CommentList) LoadAttachments(ctx context.Context) (err error) { limit := min(left, db.DefaultMaxInSize) rows, err := db.GetEngine(ctx). In("comment_id", commentsIDs[:limit]). - And("status = ?", db.FileStatusNormal). Rows(new(repo_model.Attachment)) if err != nil { return err diff --git a/models/issues/comment_test.go b/models/issues/comment_test.go index 610a75aea60..c08e3b970d3 100644 --- a/models/issues/comment_test.go +++ b/models/issues/comment_test.go @@ -50,9 +50,7 @@ func Test_UpdateCommentAttachment(t *testing.T) { comment := unittest.AssertExistsAndLoadBean(t, &issues_model.Comment{ID: 1}) attachment := repo_model.Attachment{ - Name: "test.txt", - Status: db.FileStatusNormal, - UUID: "test-uuid", + Name: "test.txt", } assert.NoError(t, db.Insert(db.DefaultContext, &attachment)) diff --git a/models/issues/issue_list.go b/models/issues/issue_list.go index 98b0becafd1..26b93189b8b 100644 --- a/models/issues/issue_list.go +++ b/models/issues/issue_list.go @@ -339,7 +339,6 @@ func (issues IssueList) LoadAttachments(ctx context.Context) (err error) { limit := min(left, db.DefaultMaxInSize) rows, err := db.GetEngine(ctx). In("issue_id", issuesIDs[:limit]). - And("status = ?", db.FileStatusNormal). Rows(new(repo_model.Attachment)) if err != nil { return err diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 1b6ec04d36e..f6a6c9b49aa 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -385,7 +385,7 @@ func prepareMigrationTasks() []*migration { newMigration(320, "Migrate two_factor_policy to login_source table", v1_24.MigrateSkipTwoFactor), // Gitea 1.24.0-rc0 ends at migration ID number 320 (database version 321) - newMigration(321, "Add file status columns to attachment table", v1_25.AddFileStatusToAttachment), + newMigration(321, "Add storage_path_deletion table", v1_25.AddStoragePathDeletion), } return preparedMigrations } diff --git a/models/migrations/v1_25/main_test.go b/models/migrations/v1_25/main_test.go deleted file mode 100644 index d2c4a4105d3..00000000000 --- a/models/migrations/v1_25/main_test.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2025 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package v1_25 - -import ( - "testing" - - "code.gitea.io/gitea/models/migrations/base" -) - -func TestMain(m *testing.M) { - base.MainTest(m) -} diff --git a/models/migrations/v1_25/v321.go b/models/migrations/v1_25/v321.go index 47096f5ad9e..a804f4ea7bf 100644 --- a/models/migrations/v1_25/v321.go +++ b/models/migrations/v1_25/v321.go @@ -4,72 +4,23 @@ package v1_25 import ( - "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/timeutil" "xorm.io/xorm" - "xorm.io/xorm/schemas" ) -type Attachment321 struct { - ID int64 `xorm:"pk autoincr"` - UUID string `xorm:"uuid"` - RepoID int64 // this should not be zero - IssueID int64 // maybe zero when creating - ReleaseID int64 // maybe zero when creating - UploaderID int64 `xorm:"DEFAULT 0"` // Notice: will be zero before this column added - CommentID int64 - Name string - DownloadCount int64 `xorm:"DEFAULT 0"` - Status db.FileStatus `xorm:"DEFAULT 1 NOT NULL"` // 1 = normal, 2 = to be deleted - DeleteFailedCount int `xorm:"DEFAULT 0 NOT NULL"` // Number of times the deletion failed, used to prevent infinite loop - LastDeleteFailedTime timeutil.TimeStamp // Last time the deletion failed, used to prevent infinite loop - Size int64 `xorm:"DEFAULT 0"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` -} - -func (a *Attachment321) TableName() string { - return "attachment" -} - -// TableIndices implements xorm's TableIndices interface -func (a *Attachment321) TableIndices() []*schemas.Index { - uuidIndex := schemas.NewIndex("uuid", schemas.UniqueType) - uuidIndex.AddColumn("uuid") - - repoIndex := schemas.NewIndex("repo_id", schemas.IndexType) - repoIndex.AddColumn("repo_id") - - issueIndex := schemas.NewIndex("issue_id", schemas.IndexType) - issueIndex.AddColumn("issue_id") - - releaseIndex := schemas.NewIndex("release_id", schemas.IndexType) - releaseIndex.AddColumn("release_id") - - uploaderIndex := schemas.NewIndex("uploader_id", schemas.IndexType) - uploaderIndex.AddColumn("uploader_id") - - commentIndex := schemas.NewIndex("comment_id", schemas.IndexType) - commentIndex.AddColumn("comment_id") - - statusIndex := schemas.NewIndex("status", schemas.IndexType) - statusIndex.AddColumn("status") - - statusIDIndex := schemas.NewIndex("status_id", schemas.IndexType) - statusIDIndex.AddColumn("status", "id") // For status = ? AND id > ? query - - return []*schemas.Index{ - uuidIndex, - repoIndex, - issueIndex, - releaseIndex, - uploaderIndex, - commentIndex, - statusIndex, - statusIDIndex, +func AddStoragePathDeletion(x *xorm.Engine) error { + // StoragePathDeletion represents a file or directory that is pending deletion. + type StoragePathDeletion struct { + ID int64 + StorageName string // storage name defines in storage module + PathType int // 1 for file, 2 for directory + RelativePath string `xorm:"TEXT"` + DeleteFailedCount int `xorm:"DEFAULT 0 NOT NULL"` // Number of times the deletion failed, used to prevent infinite loop + LastDeleteFailedReason string `xorm:"TEXT"` // Last reason the deletion failed, used to prevent infinite loop + LastDeleteFailedTime timeutil.TimeStamp // Last time the deletion failed, used to prevent infinite loop + CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` } -} -func AddFileStatusToAttachment(x *xorm.Engine) error { - return x.Sync(new(Attachment321)) + return x.Sync(new(StoragePathDeletion)) } diff --git a/models/migrations/v1_25/v321_test.go b/models/migrations/v1_25/v321_test.go deleted file mode 100644 index 47bb9ea2429..00000000000 --- a/models/migrations/v1_25/v321_test.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2025 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package v1_25 - -import ( - "testing" - - "code.gitea.io/gitea/models/migrations/base" - "code.gitea.io/gitea/modules/timeutil" - - "github.com/stretchr/testify/assert" -) - -func Test_AddFileStatusToAttachment(t *testing.T) { - type Attachment struct { - ID int64 `xorm:"pk autoincr"` - UUID string `xorm:"uuid UNIQUE"` - RepoID int64 `xorm:"INDEX"` // this should not be zero - IssueID int64 `xorm:"INDEX"` // maybe zero when creating - ReleaseID int64 `xorm:"INDEX"` // maybe zero when creating - UploaderID int64 `xorm:"INDEX DEFAULT 0"` // Notice: will be zero before this column added - CommentID int64 `xorm:"INDEX"` - Name string - DownloadCount int64 `xorm:"DEFAULT 0"` - Size int64 `xorm:"DEFAULT 0"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - } - - // Prepare and load the testing database - x, deferable := base.PrepareTestEnv(t, 0, new(Attachment)) - defer deferable() - - assert.NoError(t, AddFileStatusToAttachment(x)) -} diff --git a/models/repo/attachment.go b/models/repo/attachment.go index 86dfea1c2da..b82317d47ba 100644 --- a/models/repo/attachment.go +++ b/models/repo/attachment.go @@ -11,69 +11,27 @@ import ( "path" "code.gitea.io/gitea/models/db" + system_model "code.gitea.io/gitea/models/system" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" - - "xorm.io/xorm/schemas" ) // Attachment represent a attachment of issue/comment/release. type Attachment struct { - ID int64 `xorm:"pk autoincr"` - UUID string `xorm:"uuid"` - RepoID int64 // this should not be zero - IssueID int64 // maybe zero when creating - ReleaseID int64 // maybe zero when creating - UploaderID int64 `xorm:"DEFAULT 0"` // Notice: will be zero before this column added - CommentID int64 - Name string - DownloadCount int64 `xorm:"DEFAULT 0"` - Status db.FileStatus `xorm:"DEFAULT 1 NOT NULL"` // 1 = normal, 2 = to be deleted - DeleteFailedCount int `xorm:"DEFAULT 0 NOT NULL"` // Number of times the deletion failed, used to prevent infinite loop - LastDeleteFailedReason string `xorm:"TEXT"` // Last reason the deletion failed, used to prevent infinite loop - LastDeleteFailedTime timeutil.TimeStamp // Last time the deletion failed, used to prevent infinite loop - Size int64 `xorm:"DEFAULT 0"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - CustomDownloadURL string `xorm:"-"` -} - -// TableIndices implements xorm's TableIndices interface -func (a *Attachment) TableIndices() []*schemas.Index { - uuidIndex := schemas.NewIndex("uuid", schemas.UniqueType) - uuidIndex.AddColumn("uuid") - - repoIndex := schemas.NewIndex("repo_id", schemas.IndexType) - repoIndex.AddColumn("repo_id") - - issueIndex := schemas.NewIndex("issue_id", schemas.IndexType) - issueIndex.AddColumn("issue_id") - - releaseIndex := schemas.NewIndex("release_id", schemas.IndexType) - releaseIndex.AddColumn("release_id") - - uploaderIndex := schemas.NewIndex("uploader_id", schemas.IndexType) - uploaderIndex.AddColumn("uploader_id") - - commentIndex := schemas.NewIndex("comment_id", schemas.IndexType) - commentIndex.AddColumn("comment_id") - - statusIndex := schemas.NewIndex("status", schemas.IndexType) - statusIndex.AddColumn("status") - - statusIDIndex := schemas.NewIndex("status_id", schemas.IndexType) - statusIDIndex.AddColumn("status", "id") // For status = ? AND id > ? query - - return []*schemas.Index{ - uuidIndex, - repoIndex, - issueIndex, - releaseIndex, - uploaderIndex, - commentIndex, - statusIndex, - statusIDIndex, - } + ID int64 `xorm:"pk autoincr"` + UUID string `xorm:"uuid UNIQUE"` + RepoID int64 `xorm:"INDEX"` // this should not be zero + IssueID int64 `xorm:"INDEX"` // maybe zero when creating + ReleaseID int64 `xorm:"INDEX"` // maybe zero when creating + UploaderID int64 `xorm:"INDEX DEFAULT 0"` // Notice: will be zero before this column added + CommentID int64 `xorm:"INDEX"` + Name string + DownloadCount int64 `xorm:"DEFAULT 0"` + Size int64 `xorm:"DEFAULT 0"` + CreatedUnix timeutil.TimeStamp `xorm:"created"` + CustomDownloadURL string `xorm:"-"` } func init() { @@ -132,9 +90,7 @@ func (err ErrAttachmentNotExist) Unwrap() error { // GetAttachmentByID returns attachment by given id func GetAttachmentByID(ctx context.Context, id int64) (*Attachment, error) { attach := &Attachment{} - if has, err := db.GetEngine(ctx).ID(id). - And("status = ?", db.FileStatusNormal). - Get(attach); err != nil { + if has, err := db.GetEngine(ctx).ID(id).Get(attach); err != nil { return nil, err } else if !has { return nil, ErrAttachmentNotExist{ID: id, UUID: ""} @@ -145,9 +101,7 @@ func GetAttachmentByID(ctx context.Context, id int64) (*Attachment, error) { // GetAttachmentByUUID returns attachment by given UUID. func GetAttachmentByUUID(ctx context.Context, uuid string) (*Attachment, error) { attach := &Attachment{} - has, err := db.GetEngine(ctx).Where("uuid=?", uuid). - And("status = ?", db.FileStatusNormal). - Get(attach) + has, err := db.GetEngine(ctx).Where("uuid=?", uuid).Get(attach) if err != nil { return nil, err } else if !has { @@ -164,24 +118,18 @@ func GetAttachmentsByUUIDs(ctx context.Context, uuids []string) ([]*Attachment, // Silently drop invalid uuids. attachments := make([]*Attachment, 0, len(uuids)) - return attachments, db.GetEngine(ctx).In("uuid", uuids). - And("status = ?", db.FileStatusNormal). - Find(&attachments) + return attachments, db.GetEngine(ctx).In("uuid", uuids).Find(&attachments) } // ExistAttachmentsByUUID returns true if attachment exists with the given UUID func ExistAttachmentsByUUID(ctx context.Context, uuid string) (bool, error) { - return db.GetEngine(ctx).Where("`uuid`=?", uuid). - And("status = ?", db.FileStatusNormal). - Exist(new(Attachment)) + return db.GetEngine(ctx).Where("`uuid`=?", uuid).Exist(new(Attachment)) } // GetAttachmentsByIssueID returns all attachments of an issue. func GetAttachmentsByIssueID(ctx context.Context, issueID int64) ([]*Attachment, error) { attachments := make([]*Attachment, 0, 10) - return attachments, db.GetEngine(ctx).Where("issue_id = ? AND comment_id = 0", issueID). - And("status = ?", db.FileStatusNormal). - Find(&attachments) + return attachments, db.GetEngine(ctx).Where("issue_id = ? AND comment_id = 0", issueID).Find(&attachments) } // GetAttachmentsByIssueIDImagesLatest returns the latest image attachments of an issue. @@ -196,23 +144,19 @@ func GetAttachmentsByIssueIDImagesLatest(ctx context.Context, issueID int64) ([] OR name like '%.jxl' OR name like '%.png' OR name like '%.svg' - OR name like '%.webp')`, issueID). - And("status = ?", db.FileStatusNormal). - Desc("comment_id").Limit(5).Find(&attachments) + OR name like '%.webp')`, issueID).Desc("comment_id").Limit(5).Find(&attachments) } // GetAttachmentsByCommentID returns all attachments if comment by given ID. func GetAttachmentsByCommentID(ctx context.Context, commentID int64) ([]*Attachment, error) { attachments := make([]*Attachment, 0, 10) - return attachments, db.GetEngine(ctx).Where("comment_id=?", commentID). - And("status = ?", db.FileStatusNormal). - Find(&attachments) + return attachments, db.GetEngine(ctx).Where("comment_id=?", commentID).Find(&attachments) } // GetAttachmentByReleaseIDFileName returns attachment by given releaseId and fileName. func GetAttachmentByReleaseIDFileName(ctx context.Context, releaseID int64, fileName string) (*Attachment, error) { attach := &Attachment{ReleaseID: releaseID, Name: fileName} - has, err := db.GetEngine(ctx).Where("status = ?", db.FileStatusNormal).Get(attach) + has, err := db.GetEngine(ctx).Get(attach) if err != nil { return nil, err } else if !has { @@ -221,6 +165,41 @@ func GetAttachmentByReleaseIDFileName(ctx context.Context, releaseID int64, file return attach, nil } +// DeleteAttachments delete the given attachments and add disk files to pending deletion +func DeleteAttachments(ctx context.Context, attachments []*Attachment) ([]int64, error) { + if len(attachments) == 0 { + return nil, nil + } + + ids := make([]int64, 0, len(attachments)) + for _, a := range attachments { + ids = append(ids, a.ID) + } + + return db.WithTx2(ctx, func(ctx context.Context) ([]int64, error) { + // delete attachments from database + if _, err := db.GetEngine(ctx).Table("attachment").In("id", ids).Delete(); err != nil { + return nil, err + } + + // add disk files to pending deletion table as well + var deletionIDs []int64 + for _, a := range attachments { + pendingDeletion := &system_model.StoragePathDeletion{ + StorageName: storage.AttachmentStorageName, + PathType: system_model.PathFile, + RelativePath: a.RelativePath(), + } + if err := db.Insert(ctx, pendingDeletion); err != nil { + return nil, fmt.Errorf("insert pending deletion: %w", err) + } + + deletionIDs = append(deletionIDs, pendingDeletion.ID) // Collect pending deletions + } + return deletionIDs, nil + }) +} + // UpdateAttachmentByUUID Updates attachment via uuid func UpdateAttachmentByUUID(ctx context.Context, attach *Attachment, cols ...string) error { if attach.UUID == "" { @@ -243,52 +222,6 @@ func UpdateAttachment(ctx context.Context, atta *Attachment) error { return err } -// MarkAttachmentsDeleted marks the given attachments as deleted -func MarkAttachmentsDeleted(ctx context.Context, attachments []*Attachment) (int64, error) { - if len(attachments) == 0 { - return 0, nil - } - - ids := make([]int64, 0, len(attachments)) - for _, a := range attachments { - ids = append(ids, a.ID) - } - - return db.GetEngine(ctx).Table("attachment").In("id", ids).Update(map[string]any{ - "status": db.FileStatusToBeDeleted, - }) -} - -// MarkAttachmentsDeletedByRelease marks all attachments associated with the given release as deleted. -func MarkAttachmentsDeletedByRelease(ctx context.Context, releaseID int64) error { - _, err := db.GetEngine(ctx).Table("attachment").Where("release_id = ?", releaseID).Update(map[string]any{ - "status": db.FileStatusToBeDeleted, - }) - return err -} - -// DeleteMarkedAttachmentByID deletes the attachment which has been marked as deleted by given id -func DeleteMarkedAttachmentByID(ctx context.Context, id int64) error { - cnt, err := db.GetEngine(ctx).ID(id).Where("status = ?", db.FileStatusToBeDeleted).Delete(new(Attachment)) - if err != nil { - return fmt.Errorf("delete attachment by id: %w", err) - } - if cnt != 1 { - return fmt.Errorf("the attachment with id %d was not found or is not marked for deletion", id) - } - return nil -} - -func UpdateMarkedAttachmentFailure(ctx context.Context, attachment *Attachment, err error) error { - attachment.DeleteFailedCount++ - _, updateErr := db.GetEngine(ctx).Table("attachment").ID(attachment.ID).Update(map[string]any{ - "delete_failed_count": attachment.DeleteFailedCount, - "last_delete_failed_reason": err.Error(), - "last_delete_failed_time": timeutil.TimeStampNow(), - }) - return updateErr -} - // CountOrphanedAttachments returns the number of bad attachments func CountOrphanedAttachments(ctx context.Context) (int64, error) { return db.GetEngine(ctx).Where("(issue_id > 0 and issue_id not in (select id from issue)) or (release_id > 0 and release_id not in (select id from `release`))"). diff --git a/models/repo/release.go b/models/repo/release.go index b3aae97560c..59f4caf5aa9 100644 --- a/models/repo/release.go +++ b/models/repo/release.go @@ -378,7 +378,6 @@ func GetReleaseAttachments(ctx context.Context, rels ...*Release) (err error) { err = db.GetEngine(ctx). Asc("release_id", "name"). In("release_id", sortedRels.ID). - And("status = ?", db.FileStatusNormal). Find(&attachments) if err != nil { return err diff --git a/models/system/storage_cleanup.go b/models/system/storage_cleanup.go new file mode 100644 index 00000000000..3151b0a62ae --- /dev/null +++ b/models/system/storage_cleanup.go @@ -0,0 +1,42 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package system + +import ( + "context" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/timeutil" +) + +const ( + PathFile = 1 // PathTypeFile represents a file + PathDir = 2 // PathTypeDir represents a directory +) + +// StoragePathDeletion represents a file or directory that is pending deletion. +type StoragePathDeletion struct { + ID int64 + StorageName string // storage name defines in storage module + PathType int // 1 for file, 2 for directory + RelativePath string `xorm:"TEXT"` + DeleteFailedCount int `xorm:"DEFAULT 0 NOT NULL"` // Number of times the deletion failed, used to prevent infinite loop + LastDeleteFailedReason string `xorm:"TEXT"` // Last reason the deletion failed, used to prevent infinite loop + LastDeleteFailedTime timeutil.TimeStamp // Last time the deletion failed, used to prevent infinite loop + CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` +} + +func init() { + db.RegisterModel(new(StoragePathDeletion)) +} + +func UpdateDeletionFailure(ctx context.Context, deletion *StoragePathDeletion, err error) error { + deletion.DeleteFailedCount++ + _, updateErr := db.GetEngine(ctx).Table("storage_path_deletion").ID(deletion.ID).Update(map[string]any{ + "delete_failed_count": deletion.DeleteFailedCount, + "last_delete_failed_reason": err.Error(), + "last_delete_failed_time": timeutil.TimeStampNow(), + }) + return updateErr +} diff --git a/models/user/main_test.go b/models/user/main_test.go index 2ca502bbeaa..db60a281465 100644 --- a/models/user/main_test.go +++ b/models/user/main_test.go @@ -8,7 +8,7 @@ import ( "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/services/attachment" + "code.gitea.io/gitea/services/storagecleanup" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -20,7 +20,7 @@ func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return attachment.Init() + return storagecleanup.Init() }, }) } diff --git a/modules/storage/storage.go b/modules/storage/storage.go index 1868817c057..c017838be1b 100644 --- a/modules/storage/storage.go +++ b/modules/storage/storage.go @@ -166,6 +166,40 @@ func NewStorage(typStr Type, cfg *setting.Storage) (ObjectStorage, error) { return fn(context.Background(), cfg) } +const ( + AttachmentStorageName = "attachment" + AvatarStorageName = "avatar" + RepoAvatarStorageName = "repo_avatar" + LFSStorageName = "lfs" + RepoArchiveStorageName = "repo_archive" + PackagesStorageName = "packages" + ActionsLogStorageName = "actions_logs" + ActionsArtifactsStorageName = "actions_artifacts" +) + +func GetStorageByName(name string) (ObjectStorage, error) { + switch name { + case AttachmentStorageName: + return Attachments, nil + case AvatarStorageName: + return Avatars, nil + case RepoAvatarStorageName: + return RepoAvatars, nil + case LFSStorageName: + return LFS, nil + case RepoArchiveStorageName: + return RepoArchives, nil + case PackagesStorageName: + return Packages, nil + case ActionsLogStorageName: + return Actions, nil + case ActionsArtifactsStorageName: + return ActionsArtifacts, nil + default: + return nil, fmt.Errorf("Unknown storage name: %s", name) + } +} + func initAvatars() (err error) { log.Info("Initialising Avatar storage with type: %s", setting.Avatar.Storage.Type) Avatars, err = NewStorage(setting.Avatar.Storage.Type, setting.Avatar.Storage) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index ab925669a56..e11a8477fe5 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3065,7 +3065,7 @@ dashboard.sync_branch.started = Branches Sync started dashboard.sync_tag.started = Tags Sync started dashboard.rebuild_issue_indexer = Rebuild issue indexer dashboard.sync_repo_licenses = Sync repo licenses -dashboard.clean_attachments = Clean up deleted attachments +dashboard.cleanup_storage = Clean up deleted storage files users.user_manage_panel = User Account Management users.new_account = Create User Account diff --git a/routers/init.go b/routers/init.go index b8bcd937bf9..39c34bf3a37 100644 --- a/routers/init.go +++ b/routers/init.go @@ -36,7 +36,6 @@ import ( web_routers "code.gitea.io/gitea/routers/web" actions_service "code.gitea.io/gitea/services/actions" asymkey_service "code.gitea.io/gitea/services/asymkey" - attachment_service "code.gitea.io/gitea/services/attachment" "code.gitea.io/gitea/services/auth" "code.gitea.io/gitea/services/auth/source/oauth2" "code.gitea.io/gitea/services/automerge" @@ -53,6 +52,7 @@ import ( release_service "code.gitea.io/gitea/services/release" repo_service "code.gitea.io/gitea/services/repository" "code.gitea.io/gitea/services/repository/archiver" + "code.gitea.io/gitea/services/storagecleanup" "code.gitea.io/gitea/services/task" "code.gitea.io/gitea/services/uinotification" "code.gitea.io/gitea/services/webhook" @@ -175,7 +175,7 @@ func InitWebInstalled(ctx context.Context) { mustInitCtx(ctx, actions_service.Init) mustInit(repo_service.InitLicenseClassifier) - mustInit(attachment_service.Init) + mustInit(storagecleanup.Init) // Finally start up the cron cron.NewContext(ctx) diff --git a/services/attachment/attachment.go b/services/attachment/attachment.go index f14daf20e8e..65ff3629ec3 100644 --- a/services/attachment/attachment.go +++ b/services/attachment/attachment.go @@ -6,20 +6,15 @@ package attachment import ( "bytes" "context" - "errors" "fmt" "io" - "os" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" - "code.gitea.io/gitea/models/system" - "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/queue" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/services/context/upload" + "code.gitea.io/gitea/services/storagecleanup" "github.com/google/uuid" ) @@ -37,7 +32,6 @@ func NewAttachment(ctx context.Context, attach *repo_model.Attachment, file io.R return fmt.Errorf("Create: %w", err) } attach.Size = size - attach.Status = db.FileStatusNormal return db.Insert(ctx, attach) }) @@ -75,109 +69,12 @@ func DeleteAttachment(ctx context.Context, a *repo_model.Attachment) error { // DeleteAttachments deletes the given attachments and optionally the associated files. func DeleteAttachments(ctx context.Context, attachments []*repo_model.Attachment) (int, error) { - cnt, err := repo_model.MarkAttachmentsDeleted(ctx, attachments) + deletions, err := repo_model.DeleteAttachments(ctx, attachments) if err != nil { return 0, err } - AddAttachmentsToCleanQueue(ctx, attachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, deletions) - return int(cnt), nil -} - -var cleanQueue *queue.WorkerPoolQueue[int64] - -func Init() error { - cleanQueue = queue.CreateSimpleQueue(graceful.GetManager().ShutdownContext(), "attachments-clean", handler) - if cleanQueue == nil { - return errors.New("Unable to create attachments-clean queue") - } - go graceful.GetManager().RunWithCancel(cleanQueue) - return nil -} - -// AddAttachmentsToCleanQueue adds the attachments to the clean queue for deletion. -func AddAttachmentsToCleanQueue(ctx context.Context, attachments []*repo_model.Attachment) { - for _, a := range attachments { - if err := cleanQueue.Push(a.ID); err != nil { - log.Error("Failed to push attachment ID %d to clean queue: %v", a.ID, err) - continue - } - } -} - -func handler(attachmentIDs ...int64) []int64 { - return cleanAttachments(graceful.GetManager().ShutdownContext(), attachmentIDs) -} - -func cleanAttachments(ctx context.Context, attachmentIDs []int64) []int64 { - var failed []int64 - for _, attachmentID := range attachmentIDs { - attachment, exist, err := db.GetByID[repo_model.Attachment](ctx, attachmentID) - if err != nil { - log.Error("Failed to get attachment by ID %d: %v", attachmentID, err) - continue - } - if !exist { - continue - } - if attachment.Status != db.FileStatusToBeDeleted { - log.Trace("Attachment %s is not marked for deletion, skipping", attachment.RelativePath()) - continue - } - - if err := storage.Attachments.Delete(attachment.RelativePath()); err != nil { - if !errors.Is(err, os.ErrNotExist) { - log.Error("delete attachment[uuid: %s] failed: %v", attachment.UUID, err) - failed = append(failed, attachment.ID) - if attachment.DeleteFailedCount%3 == 0 { - _ = system.CreateNotice(ctx, system.NoticeRepository, fmt.Sprintf("Failed to delete attachment %s (%d times): %v", attachment.RelativePath(), attachment.DeleteFailedCount+1, err)) - } - if err := repo_model.UpdateMarkedAttachmentFailure(ctx, attachment, err); err != nil { - log.Error("Failed to update attachment failure for ID %d: %v", attachment.ID, err) - } - continue - } - } - if err := repo_model.DeleteMarkedAttachmentByID(ctx, attachment.ID); err != nil { - log.Error("Failed to delete attachment by ID %d(will be tried later): %v", attachment.ID, err) - failed = append(failed, attachment.ID) - } else { - log.Trace("Attachment %s deleted from database", attachment.RelativePath()) - } - } - return failed -} - -// ScanToBeDeletedAttachments scans for attachments that are marked as to be deleted and send to -// clean queue -func ScanToBeDeletedAttachments(ctx context.Context) error { - attachmentIDs := make([]int64, 0, 100) - lastID := int64(0) - for { - if err := db.GetEngine(ctx). - Select("id"). - // use the status and id index to speed up the query - Where("status = ? AND id > ?", db.FileStatusToBeDeleted, lastID). - Asc("id"). - Limit(100). - Find(&attachmentIDs); err != nil { - return fmt.Errorf("scan to-be-deleted attachments: %w", err) - } - - if len(attachmentIDs) == 0 { - log.Trace("No more attachments to be deleted") - break - } - for _, id := range attachmentIDs { - if err := cleanQueue.Push(id); err != nil { - log.Error("Failed to push attachment ID %d to clean queue: %v", id, err) - } - } - - lastID = attachmentIDs[len(attachmentIDs)-1] - attachmentIDs = attachmentIDs[0:0] - } - - return nil + return len(deletions), nil } diff --git a/services/attachment/attachment_test.go b/services/attachment/attachment_test.go index f4e178c3623..937d4cba7d8 100644 --- a/services/attachment/attachment_test.go +++ b/services/attachment/attachment_test.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/services/storagecleanup" _ "code.gitea.io/gitea/models/actions" @@ -23,7 +24,7 @@ func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return Init() + return storagecleanup.Init() }, }) } diff --git a/services/cron/tasks_extended.go b/services/cron/tasks_extended.go index f9383a3093f..4252d98b402 100644 --- a/services/cron/tasks_extended.go +++ b/services/cron/tasks_extended.go @@ -15,9 +15,9 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/updatechecker" asymkey_service "code.gitea.io/gitea/services/asymkey" - attachment_service "code.gitea.io/gitea/services/attachment" repo_service "code.gitea.io/gitea/services/repository" archiver_service "code.gitea.io/gitea/services/repository/archiver" + "code.gitea.io/gitea/services/storagecleanup" user_service "code.gitea.io/gitea/services/user" ) @@ -224,13 +224,13 @@ func registerRebuildIssueIndexer() { }) } -func registerCleanAttachments() { - RegisterTaskFatal("clean_attachments", &BaseConfig{ +func registerCleanStorage() { + RegisterTaskFatal("cleanup_storage", &BaseConfig{ Enabled: false, RunAtStart: false, Schedule: "@every 24h", }, func(ctx context.Context, _ *user_model.User, _ Config) error { - return attachment_service.ScanToBeDeletedAttachments(ctx) + return storagecleanup.ScanToBeDeletedFilesOrDir(ctx) }) } @@ -249,5 +249,5 @@ func initExtendedTasks() { registerDeleteOldSystemNotices() registerGCLFS() registerRebuildIssueIndexer() - registerCleanAttachments() + registerCleanStorage() } diff --git a/services/doctor/repository.go b/services/doctor/repository.go index 4a8d00b5716..359c4a17e0d 100644 --- a/services/doctor/repository.go +++ b/services/doctor/repository.go @@ -36,6 +36,7 @@ func deleteOrphanedRepos(ctx context.Context) (int64, error) { } batchSize := db.MaxBatchInsertSize("repository") + e := db.GetEngine(ctx) var deleted int64 for { @@ -44,7 +45,7 @@ func deleteOrphanedRepos(ctx context.Context) (int64, error) { return deleted, ctx.Err() default: var ids []int64 - if err := db.GetEngine(ctx).Table("`repository`"). + if err := e.Table("`repository`"). Join("LEFT", "`user`", "repository.owner_id=`user`.id"). Where(builder.IsNull{"`user`.id"}). Select("`repository`.id").Limit(batchSize).Find(&ids); err != nil { diff --git a/services/issue/comments.go b/services/issue/comments.go index 153d2ebbd60..cfcfe78dafa 100644 --- a/services/issue/comments.go +++ b/services/issue/comments.go @@ -16,9 +16,9 @@ import ( "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/services/attachment" git_service "code.gitea.io/gitea/services/git" notify_service "code.gitea.io/gitea/services/notify" + "code.gitea.io/gitea/services/storagecleanup" ) // CreateRefComment creates a commit reference comment to issue. @@ -132,36 +132,35 @@ func UpdateComment(ctx context.Context, c *issues_model.Comment, contentVersion } // deleteComment deletes the comment -func deleteComment(ctx context.Context, comment *issues_model.Comment, removeAttachments bool) error { - return db.WithTx(ctx, func(ctx context.Context) error { +func deleteComment(ctx context.Context, comment *issues_model.Comment, removeAttachments bool) ([]int64, error) { + return db.WithTx2(ctx, func(ctx context.Context) ([]int64, error) { if removeAttachments { // load attachments before deleting the comment if err := comment.LoadAttachments(ctx); err != nil { - return err + return nil, err } } // deletedReviewComment should be a review comment with no content and no attachments if err := issues_model.DeleteComment(ctx, comment); err != nil { - return err + return nil, err } if removeAttachments { // mark comment attachments as deleted - if _, err := repo_model.MarkAttachmentsDeleted(ctx, comment.Attachments); err != nil { - return err - } + return repo_model.DeleteAttachments(ctx, comment.Attachments) } - return nil + return nil, nil }) } func DeleteComment(ctx context.Context, doer *user_model.User, comment *issues_model.Comment) error { - if err := deleteComment(ctx, comment, true); err != nil { + deletions, err := deleteComment(ctx, comment, true) + if err != nil { return err } - attachment.AddAttachmentsToCleanQueue(ctx, comment.Attachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, deletions) notify_service.DeleteComment(ctx, doer, comment) diff --git a/services/issue/issue.go b/services/issue/issue.go index 9b3b0c66b21..c0f61859805 100644 --- a/services/issue/issue.go +++ b/services/issue/issue.go @@ -17,8 +17,8 @@ import ( "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" - attachment_service "code.gitea.io/gitea/services/attachment" notify_service "code.gitea.io/gitea/services/notify" + "code.gitea.io/gitea/services/storagecleanup" ) // NewIssue creates new issue with labels for repository. @@ -189,12 +189,12 @@ func DeleteIssue(ctx context.Context, doer *user_model.User, gitRepo *git.Reposi } // delete entries in database - toBeCleanedAttachments, err := deleteIssue(ctx, issue, true) + toBeCleanedDeletions, err := deleteIssue(ctx, issue, true) if err != nil { return err } - attachment_service.AddAttachmentsToCleanQueue(ctx, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) // delete pull request related git data if issue.IsPull && gitRepo != nil { @@ -258,9 +258,9 @@ func GetRefEndNamesAndURLs(issues []*issues_model.Issue, repoLink string) (map[i } // deleteIssue deletes the issue -func deleteIssue(ctx context.Context, issue *issues_model.Issue, deleteAttachments bool) ([]*repo_model.Attachment, error) { - return db.WithTx2(ctx, func(ctx context.Context) ([]*repo_model.Attachment, error) { - toBeCleanedAttachments := make([]*repo_model.Attachment, 0) +func deleteIssue(ctx context.Context, issue *issues_model.Issue, deleteAttachments bool) ([]int64, error) { + return db.WithTx2(ctx, func(ctx context.Context) ([]int64, error) { + toBeCleanedDeletions := make([]int64, 0) if _, err := db.GetEngine(ctx).ID(issue.ID).NoAutoCondition().Delete(issue); err != nil { return nil, err } @@ -315,11 +315,12 @@ func deleteIssue(ctx context.Context, issue *issues_model.Issue, deleteAttachmen } for _, comment := range issue.Comments { - if err := deleteComment(ctx, comment, deleteAttachments); err != nil { + deletions, err := deleteComment(ctx, comment, deleteAttachments) + if err != nil { return nil, fmt.Errorf("deleteComment [comment_id: %d]: %w", comment.ID, err) } if deleteAttachments { - toBeCleanedAttachments = append(toBeCleanedAttachments, comment.Attachments...) + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) } } @@ -328,41 +329,42 @@ func deleteIssue(ctx context.Context, issue *issues_model.Issue, deleteAttachmen if err := issue.LoadAttachments(ctx); err != nil { return nil, err } - if _, err := repo_model.MarkAttachmentsDeleted(ctx, issue.Attachments); err != nil { + deletions, err := repo_model.DeleteAttachments(ctx, issue.Attachments) + if err != nil { return nil, err } - toBeCleanedAttachments = append(toBeCleanedAttachments, issue.Attachments...) + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) } - return toBeCleanedAttachments, nil + return toBeCleanedDeletions, nil }) } // DeleteOrphanedIssues delete issues without a repo func DeleteOrphanedIssues(ctx context.Context) error { - toBeCleanedAttachments := make([]*repo_model.Attachment, 0) + toBeCleanedDeletions := make([]int64, 0) if err := db.WithTx(ctx, func(ctx context.Context) error { repoIDs, err := issues_model.GetOrphanedIssueRepoIDs(ctx) if err != nil { return err } for i := range repoIDs { - toBeCleanedIssueAttachments, err := DeleteIssuesByRepoID(ctx, repoIDs[i], true) + deletions, err := DeleteIssuesByRepoID(ctx, repoIDs[i], true) if err != nil { return err } - toBeCleanedAttachments = append(toBeCleanedAttachments, toBeCleanedIssueAttachments...) + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) } return nil }); err != nil { return err } - attachment_service.AddAttachmentsToCleanQueue(ctx, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) return nil } // DeleteIssuesByRepoID deletes issues by repositories id -func DeleteIssuesByRepoID(ctx context.Context, repoID int64, deleteAttachments bool) ([]*repo_model.Attachment, error) { - toBeCleanedAttachments := make([]*repo_model.Attachment, 0) +func DeleteIssuesByRepoID(ctx context.Context, repoID int64, deleteAttachments bool) ([]int64, error) { + toBeCleanedDeletions := make([]int64, 0) for { issues := make([]*issues_model.Issue, 0, db.DefaultMaxInSize) if err := db.GetEngine(ctx). @@ -378,13 +380,13 @@ func DeleteIssuesByRepoID(ctx context.Context, repoID int64, deleteAttachments b } for _, issue := range issues { - toBeCleanedIssueAttachments, err := deleteIssue(ctx, issue, deleteAttachments) + deletions, err := deleteIssue(ctx, issue, deleteAttachments) if err != nil { return nil, fmt.Errorf("deleteIssue [issue_id: %d]: %w", issue.ID, err) } - toBeCleanedAttachments = append(toBeCleanedAttachments, toBeCleanedIssueAttachments...) + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) } } - return toBeCleanedAttachments, nil + return toBeCleanedDeletions, nil } diff --git a/services/issue/issue_test.go b/services/issue/issue_test.go index 5bf8426d7b9..780f25ad009 100644 --- a/services/issue/issue_test.go +++ b/services/issue/issue_test.go @@ -11,7 +11,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" - attachment_service "code.gitea.io/gitea/services/attachment" + "code.gitea.io/gitea/services/storagecleanup" "github.com/stretchr/testify/assert" ) @@ -45,9 +45,9 @@ func TestIssue_DeleteIssue(t *testing.T) { ID: issueIDs[2], } - toBeCleanedAttachments, err := deleteIssue(db.DefaultContext, issue, true) + toBeCleanedDeletions, err := deleteIssue(db.DefaultContext, issue, true) assert.NoError(t, err) - attachment_service.AddAttachmentsToCleanQueue(db.DefaultContext, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(db.DefaultContext, toBeCleanedDeletions) issueIDs, err = issues_model.GetIssueIDsByRepoID(db.DefaultContext, 1) assert.NoError(t, err) assert.Len(t, issueIDs, 4) @@ -57,9 +57,9 @@ func TestIssue_DeleteIssue(t *testing.T) { assert.NoError(t, err) issue, err = issues_model.GetIssueByID(db.DefaultContext, 4) assert.NoError(t, err) - toBeCleanedAttachments, err = deleteIssue(db.DefaultContext, issue, true) + toBeCleanedDeletions, err = deleteIssue(db.DefaultContext, issue, true) assert.NoError(t, err) - attachment_service.AddAttachmentsToCleanQueue(db.DefaultContext, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(db.DefaultContext, toBeCleanedDeletions) assert.Len(t, attachments, 2) for i := range attachments { attachment, err := repo_model.GetAttachmentByUUID(db.DefaultContext, attachments[i].UUID) @@ -81,9 +81,9 @@ func TestIssue_DeleteIssue(t *testing.T) { assert.NoError(t, err) assert.False(t, left) - toBeCleanedAttachments, err = deleteIssue(db.DefaultContext, issue2, true) + toBeCleanedDeletions, err = deleteIssue(db.DefaultContext, issue2, true) assert.NoError(t, err) - attachment_service.AddAttachmentsToCleanQueue(db.DefaultContext, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(db.DefaultContext, toBeCleanedDeletions) left, err = issues_model.IssueNoDependenciesLeft(db.DefaultContext, issue1) assert.NoError(t, err) assert.True(t, left) diff --git a/services/issue/main_test.go b/services/issue/main_test.go index 544cf2aad7e..2a57a09a879 100644 --- a/services/issue/main_test.go +++ b/services/issue/main_test.go @@ -8,7 +8,7 @@ import ( "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/services/attachment" + "code.gitea.io/gitea/services/storagecleanup" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -18,7 +18,7 @@ func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return attachment.Init() + return storagecleanup.Init() }, }) } diff --git a/services/migrations/gitea_uploader.go b/services/migrations/gitea_uploader.go index 076e494b4c3..75eb06d01fa 100644 --- a/services/migrations/gitea_uploader.go +++ b/services/migrations/gitea_uploader.go @@ -323,7 +323,6 @@ func (g *GiteaLocalUploader) CreateReleases(ctx context.Context, releases ...*ba DownloadCount: int64(*asset.DownloadCount), Size: int64(*asset.Size), CreatedUnix: timeutil.TimeStamp(asset.Created.Unix()), - Status: db.FileStatusNormal, } // SECURITY: We cannot check the DownloadURL and DownloadFunc are safe here diff --git a/services/release/release.go b/services/release/release.go index 417b16ce51e..f6502a30767 100644 --- a/services/release/release.go +++ b/services/release/release.go @@ -21,8 +21,8 @@ import ( "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" - attachment_service "code.gitea.io/gitea/services/attachment" notify_service "code.gitea.io/gitea/services/notify" + "code.gitea.io/gitea/services/storagecleanup" ) // ErrInvalidTagName represents a "InvalidTagName" kind of error. @@ -289,6 +289,7 @@ func UpdateRelease(ctx context.Context, doer *user_model.User, gitRepo *git.Repo deletedUUIDs := make(container.Set[string]) deletedAttachments := make([]*repo_model.Attachment, 0, len(delAttachmentUUIDs)) + toBeCleanedDeletions := make([]int64, 0, len(delAttachmentUUIDs)) if len(delAttachmentUUIDs) > 0 { // Check attachments attachments, err := repo_model.GetAttachmentsByUUIDs(ctx, delAttachmentUUIDs) @@ -303,9 +304,11 @@ func UpdateRelease(ctx context.Context, doer *user_model.User, gitRepo *git.Repo deletedAttachments = append(deletedAttachments, attach) } - if _, err := repo_model.MarkAttachmentsDeleted(ctx, deletedAttachments); err != nil { + deletions, err := repo_model.DeleteAttachments(ctx, deletedAttachments) + if err != nil { return fmt.Errorf("DeleteAttachments [uuids: %v]: %w", deletedUUIDs.Values(), err) } + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) // files will be deleted after database transaction is committed successfully } @@ -341,7 +344,7 @@ func UpdateRelease(ctx context.Context, doer *user_model.User, gitRepo *git.Repo return err } - attachment_service.AddAttachmentsToCleanQueue(ctx, deletedAttachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) if !rel.IsDraft { if !isTagCreated && !isConvertedFromTag { @@ -355,6 +358,7 @@ func UpdateRelease(ctx context.Context, doer *user_model.User, gitRepo *git.Repo // DeleteReleaseByID deletes a release and corresponding Git tag by given ID. func DeleteReleaseByID(ctx context.Context, repo *repo_model.Repository, rel *repo_model.Release, doer *user_model.User, delTag bool) error { + var toBeCleanedDeletions []int64 if err := db.WithTx(ctx, func(ctx context.Context) error { if delTag { protectedTags, err := git_model.GetProtectedTags(ctx, rel.RepoID) @@ -404,15 +408,17 @@ func DeleteReleaseByID(ctx context.Context, repo *repo_model.Repository, rel *re return fmt.Errorf("LoadAttributes: %w", err) } - if err := repo_model.MarkAttachmentsDeletedByRelease(ctx, rel.ID); err != nil { + deletions, err := repo_model.DeleteAttachments(ctx, rel.Attachments) + if err != nil { return fmt.Errorf("DeleteAttachments: %w", err) } + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) return nil }); err != nil { return err } - attachment_service.AddAttachmentsToCleanQueue(ctx, rel.Attachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) if !rel.IsDraft { notify_service.DeleteRelease(ctx, doer, rel) diff --git a/services/release/release_test.go b/services/release/release_test.go index 50da93446a3..69b8384d1e3 100644 --- a/services/release/release_test.go +++ b/services/release/release_test.go @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/attachment" + "code.gitea.io/gitea/services/storagecleanup" _ "code.gitea.io/gitea/models/actions" @@ -26,7 +27,7 @@ func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return attachment.Init() + return storagecleanup.Init() }, }) } diff --git a/services/repository/delete.go b/services/repository/delete.go index 5647843e4a7..ea281c44208 100644 --- a/services/repository/delete.go +++ b/services/repository/delete.go @@ -29,8 +29,8 @@ import ( "code.gitea.io/gitea/modules/storage" actions_service "code.gitea.io/gitea/services/actions" asymkey_service "code.gitea.io/gitea/services/asymkey" - attachment_service "code.gitea.io/gitea/services/attachment" issue_service "code.gitea.io/gitea/services/issue" + "code.gitea.io/gitea/services/storagecleanup" "xorm.io/builder" ) @@ -76,10 +76,9 @@ func DeleteRepositoryDirectly(ctx context.Context, repoID int64, ignoreOrgTeams } var needRewriteKeysFile bool - releaseAttachments := make([]*repo_model.Attachment, 0, 20) - var repoAttachments []*repo_model.Attachment var archivePaths []string var lfsPaths []string + toBeCleanedDeletions := make([]int64, 0, 20) err = db.WithTx(ctx, func(ctx context.Context) error { // In case owner is a organization, we have to change repo specific teams @@ -116,6 +115,7 @@ func DeleteRepositoryDirectly(ctx context.Context, repoID int64, ignoreOrgTeams } } + releaseAttachments := make([]*repo_model.Attachment, 0, 20) // some attachments have release_id but repo_id = 0 if err = db.GetEngine(ctx).Join("INNER", "`release`", "`release`.id = `attachment`.release_id"). Where("`release`.repo_id = ?", repoID). @@ -123,9 +123,11 @@ func DeleteRepositoryDirectly(ctx context.Context, repoID int64, ignoreOrgTeams return err } - if _, err := repo_model.MarkAttachmentsDeleted(ctx, releaseAttachments); err != nil { + deletions, err := repo_model.DeleteAttachments(ctx, releaseAttachments) + if err != nil { return fmt.Errorf("delete release attachments: %w", err) } + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) if _, err := db.Exec(ctx, "UPDATE `user` SET num_stars=num_stars-1 WHERE id IN (SELECT `uid` FROM `star` WHERE repo_id = ?)", repo.ID); err != nil { return err @@ -268,15 +270,18 @@ func DeleteRepositoryDirectly(ctx context.Context, repoID int64, ignoreOrgTeams } } + var repoAttachments []*repo_model.Attachment // Get all attachments with repo_id = repo.ID. some release attachments have repo_id = 0 should be deleted before if err := db.GetEngine(ctx).Where(builder.Eq{ "repo_id": repo.ID, }).Find(&repoAttachments); err != nil { return err } - if _, err := repo_model.MarkAttachmentsDeleted(ctx, repoAttachments); err != nil { + deletions, err = repo_model.DeleteAttachments(ctx, repoAttachments) + if err != nil { return err } + toBeCleanedDeletions = append(toBeCleanedDeletions, deletions...) // unlink packages linked to this repository return packages_model.UnlinkRepositoryFromAllPackages(ctx, repoID) @@ -318,8 +323,7 @@ func DeleteRepositoryDirectly(ctx context.Context, repoID int64, ignoreOrgTeams system_model.RemoveStorageWithNotice(ctx, storage.LFS, "Delete orphaned LFS file", lfsObj) } - attachment_service.AddAttachmentsToCleanQueue(ctx, releaseAttachments) - attachment_service.AddAttachmentsToCleanQueue(ctx, repoAttachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) if len(repo.Avatar) > 0 { if err := storage.RepoAvatars.Delete(repo.CustomAvatarRelativePath()); err != nil { diff --git a/services/repository/main_test.go b/services/repository/main_test.go index 01d04cc10d4..a3a9e8774ec 100644 --- a/services/repository/main_test.go +++ b/services/repository/main_test.go @@ -8,14 +8,14 @@ import ( "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/services/attachment" + "code.gitea.io/gitea/services/storagecleanup" ) func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return attachment.Init() + return storagecleanup.Init() }, }) } diff --git a/services/storagecleanup/storagecleanup.go b/services/storagecleanup/storagecleanup.go new file mode 100644 index 00000000000..c9419b826fe --- /dev/null +++ b/services/storagecleanup/storagecleanup.go @@ -0,0 +1,116 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package storagecleanup + +import ( + "context" + "errors" + "fmt" + "os" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/system" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" + "code.gitea.io/gitea/modules/storage" +) + +var cleanQueue *queue.WorkerPoolQueue[int64] + +func Init() error { + cleanQueue = queue.CreateSimpleQueue(graceful.GetManager().ShutdownContext(), "storage-cleanup", handler) + if cleanQueue == nil { + return errors.New("Unable to create storage-cleanup queue") + } + go graceful.GetManager().RunWithCancel(cleanQueue) + return nil +} + +// AddDeletionsToCleanQueue adds the attachments to the clean queue for deletion. +func AddDeletionsToCleanQueue(ctx context.Context, deletionIDs []int64) { + for _, id := range deletionIDs { + if err := cleanQueue.Push(id); err != nil { + log.Error("Failed to push deletion ID %d to clean queue: %v", id, err) + continue + } + } +} + +func handler(deletionIDs ...int64) []int64 { + return cleanupDeletions(graceful.GetManager().ShutdownContext(), deletionIDs) +} + +func cleanupDeletions(ctx context.Context, deletionIDs []int64) []int64 { + var failed []int64 + for _, deletionID := range deletionIDs { + deletion, exist, err := db.GetByID[system.StoragePathDeletion](ctx, deletionID) + if err != nil { + log.Error("Failed to get deletion by ID %d: %v", deletionID, err) + continue + } + if !exist { + continue + } + + theStorage, err := storage.GetStorageByName(deletion.StorageName) + if err != nil { + log.Error("Failed to get storage by name %s: %v", deletion.StorageName, err) + continue + } + if err := theStorage.Delete(deletion.RelativePath); err != nil { + if !errors.Is(err, os.ErrNotExist) { + log.Error("delete pending deletion[relative path: %s] failed: %v", deletion.RelativePath, err) + failed = append(failed, deletion.ID) + if deletion.DeleteFailedCount%3 == 0 { + _ = system.CreateNotice(ctx, system.NoticeRepository, fmt.Sprintf("Failed to delete pending deletion %s (%d times): %v", deletion.RelativePath, deletion.DeleteFailedCount+1, err)) + } + if err := system.UpdateDeletionFailure(ctx, deletion, err); err != nil { + log.Error("Failed to update deletion failure for ID %d: %v", deletion.ID, err) + } + continue + } + } + if _, err := db.DeleteByID[system.StoragePathDeletion](ctx, deletion.ID); err != nil { + log.Error("Failed to delete pending deletion by ID %d(will be tried later): %v", deletion.ID, err) + failed = append(failed, deletion.ID) + } else { + log.Trace("Pending deletion %s deleted from database", deletion.RelativePath) + } + } + return failed +} + +// ScanToBeDeletedFilesOrDir scans for files or directories that are marked as to be deleted and send to +// clean queue +func ScanToBeDeletedFilesOrDir(ctx context.Context) error { + deletionIDs := make([]int64, 0, 100) + lastID := int64(0) + for { + if err := db.GetEngine(ctx). + Select("id"). + // use the status and id index to speed up the query + Where("id > ?", lastID). + Asc("id"). + Limit(100). + Find(&deletionIDs); err != nil { + return fmt.Errorf("scan to-be-deleted files or directories: %w", err) + } + + if len(deletionIDs) == 0 { + log.Trace("No more files or directories to be deleted") + break + } + for _, id := range deletionIDs { + if err := cleanQueue.Push(id); err != nil { + log.Error("Failed to push deletion ID %d to clean queue: %v", id, err) + } + } + + lastID = deletionIDs[len(deletionIDs)-1] + deletionIDs = deletionIDs[0:0] + } + + return nil +} diff --git a/services/user/delete.go b/services/user/delete.go index fb8ea392f94..89a4a3e43ed 100644 --- a/services/user/delete.go +++ b/services/user/delete.go @@ -28,8 +28,8 @@ import ( ) // deleteUser deletes models associated to an user. -func deleteUser(ctx context.Context, u *user_model.User, purge bool) (toBeCleanedAttachments []*repo_model.Attachment, err error) { - toBeCleanedAttachments = make([]*repo_model.Attachment, 0) +func deleteUser(ctx context.Context, u *user_model.User, purge bool) (toBeCleanedDeletions []int64, err error) { + toBeCleanedDeletions = make([]int64, 0) // ***** START: Watch ***** watchedRepoIDs, err := db.FindIDs(ctx, "watch", "watch.repo_id", @@ -126,10 +126,11 @@ func deleteUser(ctx context.Context, u *user_model.User, purge bool) (toBeCleane return nil, err } - if _, err := repo_model.MarkAttachmentsDeleted(ctx, comment.Attachments); err != nil { + pendingDeletions, err := repo_model.DeleteAttachments(ctx, comment.Attachments) + if err != nil { return nil, err } - toBeCleanedAttachments = append(toBeCleanedAttachments, comment.Attachments...) + toBeCleanedDeletions = append(toBeCleanedDeletions, pendingDeletions...) } } @@ -207,5 +208,5 @@ func deleteUser(ctx context.Context, u *user_model.User, purge bool) (toBeCleane return nil, fmt.Errorf("delete: %w", err) } - return toBeCleanedAttachments, nil + return toBeCleanedDeletions, nil } diff --git a/services/user/user.go b/services/user/user.go index ef2d27fffd5..c9aab51ecd9 100644 --- a/services/user/user.go +++ b/services/user/user.go @@ -24,11 +24,11 @@ import ( "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/services/agit" asymkey_service "code.gitea.io/gitea/services/asymkey" - attachment_service "code.gitea.io/gitea/services/attachment" org_service "code.gitea.io/gitea/services/org" "code.gitea.io/gitea/services/packages" container_service "code.gitea.io/gitea/services/packages/container" repo_service "code.gitea.io/gitea/services/repository" + "code.gitea.io/gitea/services/storagecleanup" ) // RenameUser renames a user @@ -211,7 +211,7 @@ func DeleteUser(ctx context.Context, u *user_model.User, purge bool) error { } } - toBeCleanedAttachments, err := db.WithTx2(ctx, func(ctx context.Context) ([]*repo_model.Attachment, error) { + toBeCleanedDeletions, err := db.WithTx2(ctx, func(ctx context.Context) ([]int64, error) { // Note: A user owns any repository or belongs to any organization // cannot perform delete operation. This causes a race with the purge above // however consistency requires that we ensure that this is the case @@ -239,17 +239,17 @@ func DeleteUser(ctx context.Context, u *user_model.User, purge bool) error { return nil, packages_model.ErrUserOwnPackages{UID: u.ID} } - toBeCleanedAttachments, err := deleteUser(ctx, u, purge) + toBeCleanedDeletions, err := deleteUser(ctx, u, purge) if err != nil { return nil, fmt.Errorf("DeleteUser: %w", err) } - return toBeCleanedAttachments, nil + return toBeCleanedDeletions, nil }) if err != nil { return err } - attachment_service.AddAttachmentsToCleanQueue(ctx, toBeCleanedAttachments) + storagecleanup.AddDeletionsToCleanQueue(ctx, toBeCleanedDeletions) if err = asymkey_service.RewriteAllPublicKeys(ctx); err != nil { return err diff --git a/services/user/user_test.go b/services/user/user_test.go index f687095135e..868cd08c4ea 100644 --- a/services/user/user_test.go +++ b/services/user/user_test.go @@ -17,8 +17,8 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/services/attachment" org_service "code.gitea.io/gitea/services/org" + "code.gitea.io/gitea/services/storagecleanup" "github.com/stretchr/testify/assert" ) @@ -27,7 +27,7 @@ func TestMain(m *testing.M) { unittest.MainTest(m, &unittest.TestOptions{ SetUp: func() error { setting.LoadQueueSettings() - return attachment.Init() + return storagecleanup.Init() }, }) }