Stream repo zip/tar.gz/bundle achives by default (#35487)

Initial implementation of linked proposal.

* Closes #29942
* Fix #34003
* Fix #30443

---------

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
ChristopherHX
2025-09-19 05:51:21 +02:00
committed by GitHub
parent 90cb5f9a1f
commit 9a0ec53ee3
6 changed files with 110 additions and 156 deletions

View File

@@ -54,6 +54,12 @@ var (
AllowForkWithoutMaximumLimit bool
AllowForkIntoSameOwner bool
// StreamArchives makes Gitea stream git archive files to the client directly instead of creating an archive first.
// Ideally all users should use this streaming method. However, at the moment we don't know whether there are
// any users who still need the old behavior, so we introduce this option, intentionally not documenting it.
// After one or two releases, if no one complains, we will remove this option and always use streaming.
StreamArchives bool
// Repository editor settings
Editor struct {
LineWrapExtensions []string
@@ -167,6 +173,7 @@ var (
DisableStars: false,
DefaultBranch: "main",
AllowForkWithoutMaximumLimit: true,
StreamArchives: true,
// Repository editor settings
Editor: struct {

View File

@@ -1247,7 +1247,7 @@ func Routes() *web.Router {
}, reqToken())
m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile)
m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS)
m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive)
m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.GetArchive)
m.Combo("/forks").Get(repo.ListForks).
Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork)
m.Post("/merge-upstream", reqToken(), mustNotBeArchived, reqRepoWriter(unit.TypeCode), bind(api.MergeUpstreamRequest{}), repo.MergeUpstream)
@@ -1466,7 +1466,7 @@ func Routes() *web.Router {
m.Delete("", repo.DeleteAvatar)
}, reqAdmin(), reqToken())
m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), repo.DownloadArchive)
m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.DownloadArchive)
}, repoAssignment(), checkTokenPublicOnly())
}, tokenRequiresScopes(auth_model.AccessTokenScopeCategoryRepository))

View File

@@ -4,14 +4,29 @@
package repo
import (
"errors"
"net/http"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/services/context"
archiver_service "code.gitea.io/gitea/services/repository/archiver"
)
func serveRepoArchive(ctx *context.APIContext, reqFileName string) {
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, reqFileName)
if err != nil {
if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
ctx.APIError(http.StatusBadRequest, err)
} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
ctx.APIError(http.StatusNotFound, err)
} else {
ctx.APIErrorInternal(err)
}
return
}
archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq)
}
func DownloadArchive(ctx *context.APIContext) {
var tp git.ArchiveType
switch ballType := ctx.PathParam("ball_type"); ballType {
@@ -25,27 +40,5 @@ func DownloadArchive(ctx *context.APIContext) {
ctx.APIError(http.StatusBadRequest, "Unknown archive type: "+ballType)
return
}
if ctx.Repo.GitRepo == nil {
var err error
ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository)
if err != nil {
ctx.APIErrorInternal(err)
return
}
}
r, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*")+"."+tp.String())
if err != nil {
ctx.APIErrorInternal(err)
return
}
archive, err := r.Await(ctx)
if err != nil {
ctx.APIErrorInternal(err)
return
}
download(ctx, r.GetArchiveName(), archive)
serveRepoArchive(ctx, ctx.PathParam("*")+"."+tp.String())
}

View File

@@ -15,9 +15,7 @@ import (
"time"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/httpcache"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/lfs"
@@ -31,7 +29,6 @@ import (
"code.gitea.io/gitea/routers/common"
"code.gitea.io/gitea/services/context"
pull_service "code.gitea.io/gitea/services/pull"
archiver_service "code.gitea.io/gitea/services/repository/archiver"
files_service "code.gitea.io/gitea/services/repository/files"
)
@@ -282,74 +279,7 @@ func GetArchive(ctx *context.APIContext) {
// "404":
// "$ref": "#/responses/notFound"
if ctx.Repo.GitRepo == nil {
var err error
ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository)
if err != nil {
ctx.APIErrorInternal(err)
return
}
}
archiveDownload(ctx)
}
func archiveDownload(ctx *context.APIContext) {
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*"))
if err != nil {
if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) {
ctx.APIError(http.StatusBadRequest, err)
} else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) {
ctx.APIError(http.StatusNotFound, err)
} else {
ctx.APIErrorInternal(err)
}
return
}
archiver, err := aReq.Await(ctx)
if err != nil {
ctx.APIErrorInternal(err)
return
}
download(ctx, aReq.GetArchiveName(), archiver)
}
func download(ctx *context.APIContext, archiveName string, archiver *repo_model.RepoArchiver) {
downloadName := ctx.Repo.Repository.Name + "-" + archiveName
// Add nix format link header so tarballs lock correctly:
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
ctx.Repo.Repository.APIURL(),
archiver.CommitID,
archiver.Type.String(),
archiver.CommitID,
))
rPath := archiver.RelativePath()
if setting.RepoArchive.Storage.ServeDirect() {
// If we have a signed url (S3, object storage), redirect to this directly.
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
if u != nil && err == nil {
ctx.Redirect(u.String())
return
}
}
// If we have matched and access to release or issue
fr, err := storage.RepoArchives.Open(rPath)
if err != nil {
ctx.APIErrorInternal(err)
return
}
defer fr.Close()
ctx.ServeContent(fr, &context.ServeHeaderOptions{
Filename: downloadName,
LastModified: archiver.CreatedUnix.AsLocalTime(),
})
serveRepoArchive(ctx, ctx.PathParam("*"))
}
// GetEditorconfig get editor config of a repository

View File

@@ -24,7 +24,6 @@ import (
"code.gitea.io/gitea/modules/optional"
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/util"
@@ -376,53 +375,19 @@ func Download(ctx *context.Context) {
}
return
}
archiver, err := aReq.Await(ctx)
if err != nil {
ctx.ServerError("archiver.Await", err)
return
}
download(ctx, aReq.GetArchiveName(), archiver)
}
func download(ctx *context.Context, archiveName string, archiver *repo_model.RepoArchiver) {
downloadName := ctx.Repo.Repository.Name + "-" + archiveName
// Add nix format link header so tarballs lock correctly:
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.tar.gz?rev=%s>; rel="immutable"`,
ctx.Repo.Repository.APIURL(),
archiver.CommitID, archiver.CommitID))
rPath := archiver.RelativePath()
if setting.RepoArchive.Storage.ServeDirect() {
// If we have a signed url (S3, object storage), redirect to this directly.
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
if u != nil && err == nil {
ctx.Redirect(u.String())
return
}
}
// If we have matched and access to release or issue
fr, err := storage.RepoArchives.Open(rPath)
if err != nil {
ctx.ServerError("Open", err)
return
}
defer fr.Close()
ctx.ServeContent(fr, &context.ServeHeaderOptions{
Filename: downloadName,
LastModified: archiver.CreatedUnix.AsLocalTime(),
})
archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq)
}
// InitiateDownload will enqueue an archival request, as needed. It may submit
// a request that's already in-progress, but the archiver service will just
// kind of drop it on the floor if this is the case.
func InitiateDownload(ctx *context.Context) {
if setting.Repository.StreamArchives {
ctx.JSON(http.StatusOK, map[string]any{
"complete": true,
})
return
}
aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*"))
if err != nil {
ctx.HTTPError(http.StatusBadRequest, "invalid archive request")

View File

@@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
"net/http"
"os"
"strings"
"time"
@@ -17,11 +18,13 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/httplib"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
gitea_context "code.gitea.io/gitea/services/context"
)
// ArchiveRequest defines the parameters of an archive request, which notably
@@ -138,6 +141,25 @@ func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver
}
}
// Stream satisfies the ArchiveRequest being passed in. Processing
// will occur directly in this routine.
func (aReq *ArchiveRequest) Stream(ctx context.Context, gitRepo *git.Repository, w io.Writer) error {
if aReq.Type == git.ArchiveBundle {
return gitRepo.CreateBundle(
ctx,
aReq.CommitID,
w,
)
}
return gitRepo.CreateArchive(
ctx,
aReq.Type,
w,
setting.Repository.PrefixArchiveFiles,
aReq.CommitID,
)
}
// doArchive satisfies the ArchiveRequest being passed in. Processing
// will occur in a separate goroutine, as this phase may take a while to
// complete. If the archive already exists, doArchive will not do
@@ -204,31 +226,17 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver
}
defer gitRepo.Close()
go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) {
go func(done chan error, w *io.PipeWriter, archiveReq *ArchiveRequest, gitRepo *git.Repository) {
defer func() {
if r := recover(); r != nil {
done <- fmt.Errorf("%v", r)
}
}()
if archiver.Type == git.ArchiveBundle {
err = gitRepo.CreateBundle(
ctx,
archiver.CommitID,
w,
)
} else {
err = gitRepo.CreateArchive(
ctx,
archiver.Type,
w,
setting.Repository.PrefixArchiveFiles,
archiver.CommitID,
)
}
err := archiveReq.Stream(ctx, gitRepo, w)
_ = w.CloseWithError(err)
done <- err
}(done, w, archiver, gitRepo)
}(done, w, r, gitRepo)
// TODO: add lfs data to zip
// TODO: add submodule data to zip
@@ -338,3 +346,54 @@ func DeleteRepositoryArchives(ctx context.Context) error {
}
return storage.Clean(storage.RepoArchives)
}
func ServeRepoArchive(ctx *gitea_context.Base, repo *repo_model.Repository, gitRepo *git.Repository, archiveReq *ArchiveRequest) {
// Add nix format link header so tarballs lock correctly:
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
repo.APIURL(),
archiveReq.CommitID,
archiveReq.Type.String(),
archiveReq.CommitID,
))
downloadName := repo.Name + "-" + archiveReq.GetArchiveName()
if setting.Repository.StreamArchives {
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{Filename: downloadName})
if err := archiveReq.Stream(ctx, gitRepo, ctx.Resp); err != nil && !ctx.Written() {
log.Error("Archive %v streaming failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
}
return
}
archiver, err := archiveReq.Await(ctx)
if err != nil {
log.Error("Archive %v await failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
return
}
rPath := archiver.RelativePath()
if setting.RepoArchive.Storage.ServeDirect() {
// If we have a signed url (S3, object storage), redirect to this directly.
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
if u != nil && err == nil {
ctx.Redirect(u.String())
return
}
}
fr, err := storage.RepoArchives.Open(rPath)
if err != nil {
log.Error("Archive %v open file failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
return
}
defer fr.Close()
ctx.ServeContent(fr, &gitea_context.ServeHeaderOptions{
Filename: downloadName,
LastModified: archiver.CreatedUnix.AsLocalTime(),
})
}