FIX: URL sanitization to handle schemeless credentials (#37440)

Fixes #37435

---------

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
Nicolas
2026-04-28 17:26:08 +02:00
committed by GitHub
parent 8bf51da65f
commit fedc9dc993
5 changed files with 139 additions and 49 deletions

View File

@@ -57,14 +57,12 @@ type Command struct {
}
func logArgSanitize(arg string) string {
if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
return util.SanitizeCredentialURLs(arg)
} else if filepath.IsAbs(arg) {
if filepath.IsAbs(arg) {
base := filepath.Base(arg)
dir := filepath.Dir(arg)
return ".../" + filepath.Join(filepath.Base(dir), base)
}
return arg
return util.SanitizeCredentialURLs(arg)
}
func (c *Command) LogString() string {

View File

@@ -109,7 +109,10 @@ func TestCommandString(t *testing.T) {
assert.Equal(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString())
cmd = NewCommand("url: https://a:b@c/", "/root/dir-a/dir-b")
assert.Equal(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString())
assert.Equal(t, cmd.prog+` "url: https://(masked)@c/" .../dir-a/dir-b`, cmd.LogString())
cmd = NewCommand("url: a:b@c/", "/root/dir-a/dir-b")
assert.Equal(t, cmd.prog+` "url: (masked)@c/" .../dir-a/dir-b`, cmd.LogString())
}
func TestRunStdError(t *testing.T) {

View File

@@ -5,7 +5,8 @@ package util
import (
"bytes"
"unicode"
"net"
"strings"
)
type sanitizedError struct {
@@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error {
return sanitizedError{err: err}
}
const userPlaceholder = "sanitized-credential"
var schemeSep = []byte("://")
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
const userInfoPlaceholder = "(masked)"
// SanitizeCredentialURLs remove all credentials in URLs for the input string:
// * "https://userinfo@domain.com" => "https://***@domain.com"
// * "user:pass@domain.com" => "***@domain.com"
// "***" is a magic string internally used, doesn't guarantee to be anything.
func SanitizeCredentialURLs(s string) string {
sepColPos := strings.Index(s, ":")
if sepColPos == -1 {
return s // fast path: no colon, unlikely contain any URL credential
}
sepAtPos := strings.Index(s[sepColPos+1:], "@")
for sepAtPos == -1 {
return s // fast path: no "@" after colon, unlikely contain any URL credential
}
sepAtPos += sepColPos + 1
res := make([]byte, 0, len(s)+len(userInfoPlaceholder)) // a best guess to avoid too many re-allocations
bs := UnsafeStringToBytes(s)
schemeSepPos := bytes.Index(bs, schemeSep)
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
return s // fast return if there is no URL scheme or no userinfo
}
out := make([]byte, 0, len(bs)+len(userPlaceholder))
for schemeSepPos != -1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop:
for ; sepEndPos < len(bs); sepEndPos++ {
c := bs[sepEndPos]
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
continue
}
for {
// left part (before "@") is likely to be the "userinfo" (single username, or "username:password")
leftPos := sepAtPos - 1
leftLoop:
for leftPos >= 0 {
c := bs[leftPos]
switch c {
case '@':
sepAtPos = sepEndPos
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
// RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default:
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
if !valid {
break leftLoop
}
}
leftPos--
}
// if there is '@', and the string is like "s://u@h", then hide the "u" part
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
out = append(out, bs[:schemeSepPos]...)
out = append(out, userPlaceholder...)
out = append(out, bs[sepAtPos:sepEndPos]...)
// left pos should point to the beginning of the left part, this pos is always valid in the buffer
leftPos++
// right part is likely to be the host (domain name, ip address)
rightPos := sepAtPos + 1
rightLoop:
for rightPos < len(bs) {
c := bs[rightPos]
switch c {
case '.', '-':
// valid host char
case '[':
// ipv6 begin
if rightPos != sepAtPos+1 {
break rightLoop
}
case ']':
// ipv6 end
rightPos++
break rightLoop
default:
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
if bs[sepAtPos+1] == '[' {
// ipv6 host
valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':'
}
if !valid {
break rightLoop
}
}
rightPos++
}
leading, leftPart, rightPart := bs[:leftPos], bs[leftPos:sepAtPos], bs[sepAtPos+1:rightPos]
// Either:
// * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern
// * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo)
needSanitize := bytes.IndexByte(leftPart, ':') >= 0 || bytes.HasSuffix(leading, schemeSep)
needSanitize = needSanitize && len(leftPart) > 0 && len(rightPart) > 0
// TODO: can also do more checks for right part
// for example: ipv6 quick check
if needSanitize && rightPart[0] == '[' {
needSanitize = rightPart[len(rightPart)-1] == ']' && net.ParseIP(UnsafeBytesToString(rightPart[1:len(rightPart)-1])) != nil
}
if needSanitize {
res = append(res, leading...)
res = append(res, userInfoPlaceholder...)
res = append(res, '@')
res = append(res, rightPart...)
} else {
out = append(out, bs[:sepEndPos]...)
res = append(res, bs[:rightPos]...)
}
bs = bs[rightPos:]
sepAtPos = bytes.IndexByte(bs, '@')
if sepAtPos == -1 {
break
}
bs = bs[sepEndPos:]
schemeSepPos = bytes.Index(bs, schemeSep)
}
out = append(out, bs...)
return UnsafeBytesToString(out)
res = append(res, bs...)
return UnsafeBytesToString(res)
}

View File

@@ -13,7 +13,7 @@ import (
func TestSanitizeErrorCredentialURLs(t *testing.T) {
err := errors.New("error with https://a@b.com")
se := SanitizeErrorCredentialURLs(err)
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
assert.Equal(t, "error with https://"+userInfoPlaceholder+"@b.com", se.Error())
}
func TestSanitizeCredentialURLs(t *testing.T) {
@@ -27,15 +27,35 @@ func TestSanitizeCredentialURLs(t *testing.T) {
},
{
"https://mytoken@github.com/go-gitea/test_repo.git",
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
"https://user:password@github.com/go-gitea/test_repo.git",
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
"https://user:password@[::]/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@[::]/go-gitea/test_repo.git",
},
{
"https://user:password@[2001:db8::1]:8080/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@[2001:db8::1]:8080/go-gitea/test_repo.git",
},
{
"see https://u:p@[::1]/x and https://u2:p2@h2",
"see https://" + userInfoPlaceholder + "@[::1]/x and https://" + userInfoPlaceholder + "@h2",
},
{
"https://user:secret@[unclosed-ipv6",
"https://user:secret@[unclosed-ipv6",
},
{
"https://user:secret@[invalid-ipv6]",
"https://user:secret@[invalid-ipv6]",
},
{
"ftp://x@",
"ftp://" + userPlaceholder + "@",
"ftp://x@",
},
{
"ftp://x/@",
@@ -43,27 +63,40 @@ func TestSanitizeCredentialURLs(t *testing.T) {
},
{
"ftp://u@x/@", // test multiple @ chars
"ftp://" + userPlaceholder + "@x/@",
"ftp://" + userInfoPlaceholder + "@x/@",
},
{
"😊ftp://u@x😊", // test unicode
"😊ftp://" + userPlaceholder + "@x😊",
"😊ftp://" + userInfoPlaceholder + "@x😊",
},
{
"://@",
"://@",
},
{
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
"//u:p@h",
"//" + userInfoPlaceholder + "@h",
},
{
"s://u@h", // the minimal pattern to be sanitized
"s://" + userPlaceholder + "@h",
"s://u@h",
"s://" + userInfoPlaceholder + "@h",
},
{
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userInfoPlaceholder + "@h and https://" + userInfoPlaceholder + "@h:80/, with https://h.com and u@h.com",
},
{
"fatal: unable to look up username:token@github.com (port 9418)",
"fatal: unable to look up " + userInfoPlaceholder + "@github.com (port 9418)",
},
{
"git failed for user:token@github.com/go-gitea/test_repo.git",
"git failed for " + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
// SSH-form git URL ("git@host:path") must not let a later credential URL through
"failed remote git@github.com:foo, retried via https://user:tok@github.com/foo",
"failed remote git@github.com:foo, retried via https://" + userInfoPlaceholder + "@github.com/foo",
},
}

View File

@@ -218,7 +218,7 @@ func migrateRepository(ctx context.Context, doer *user_model.User, downloader ba
// We don't actually need to check the OriginalURL as it isn't used anywhere
}
log.Trace("migrating git data from %s", repo.CloneURL)
log.Trace("migrating git data from %s", util.SanitizeCredentialURLs(repo.CloneURL))
messenger("repo.migrate.migrating_git")
if err = uploader.CreateRepo(ctx, repo, opts); err != nil {
return err