mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-04 08:41:09 +00:00
FIX: URL sanitization to handle schemeless credentials (#37440)
Fixes #37435 --------- Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
@@ -57,14 +57,12 @@ type Command struct {
|
||||
}
|
||||
|
||||
func logArgSanitize(arg string) string {
|
||||
if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
|
||||
return util.SanitizeCredentialURLs(arg)
|
||||
} else if filepath.IsAbs(arg) {
|
||||
if filepath.IsAbs(arg) {
|
||||
base := filepath.Base(arg)
|
||||
dir := filepath.Dir(arg)
|
||||
return ".../" + filepath.Join(filepath.Base(dir), base)
|
||||
}
|
||||
return arg
|
||||
return util.SanitizeCredentialURLs(arg)
|
||||
}
|
||||
|
||||
func (c *Command) LogString() string {
|
||||
|
||||
@@ -109,7 +109,10 @@ func TestCommandString(t *testing.T) {
|
||||
assert.Equal(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString())
|
||||
|
||||
cmd = NewCommand("url: https://a:b@c/", "/root/dir-a/dir-b")
|
||||
assert.Equal(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString())
|
||||
assert.Equal(t, cmd.prog+` "url: https://(masked)@c/" .../dir-a/dir-b`, cmd.LogString())
|
||||
|
||||
cmd = NewCommand("url: a:b@c/", "/root/dir-a/dir-b")
|
||||
assert.Equal(t, cmd.prog+` "url: (masked)@c/" .../dir-a/dir-b`, cmd.LogString())
|
||||
}
|
||||
|
||||
func TestRunStdError(t *testing.T) {
|
||||
|
||||
@@ -5,7 +5,8 @@ package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type sanitizedError struct {
|
||||
@@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error {
|
||||
return sanitizedError{err: err}
|
||||
}
|
||||
|
||||
const userPlaceholder = "sanitized-credential"
|
||||
|
||||
var schemeSep = []byte("://")
|
||||
|
||||
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
|
||||
const userInfoPlaceholder = "(masked)"
|
||||
|
||||
// SanitizeCredentialURLs remove all credentials in URLs for the input string:
|
||||
// * "https://userinfo@domain.com" => "https://***@domain.com"
|
||||
// * "user:pass@domain.com" => "***@domain.com"
|
||||
// "***" is a magic string internally used, doesn't guarantee to be anything.
|
||||
func SanitizeCredentialURLs(s string) string {
|
||||
sepColPos := strings.Index(s, ":")
|
||||
if sepColPos == -1 {
|
||||
return s // fast path: no colon, unlikely contain any URL credential
|
||||
}
|
||||
sepAtPos := strings.Index(s[sepColPos+1:], "@")
|
||||
for sepAtPos == -1 {
|
||||
return s // fast path: no "@" after colon, unlikely contain any URL credential
|
||||
}
|
||||
sepAtPos += sepColPos + 1
|
||||
|
||||
res := make([]byte, 0, len(s)+len(userInfoPlaceholder)) // a best guess to avoid too many re-allocations
|
||||
bs := UnsafeStringToBytes(s)
|
||||
schemeSepPos := bytes.Index(bs, schemeSep)
|
||||
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
|
||||
return s // fast return if there is no URL scheme or no userinfo
|
||||
}
|
||||
out := make([]byte, 0, len(bs)+len(userPlaceholder))
|
||||
for schemeSepPos != -1 {
|
||||
schemeSepPos += 3 // skip the "://"
|
||||
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
|
||||
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
|
||||
sepLoop:
|
||||
for ; sepEndPos < len(bs); sepEndPos++ {
|
||||
c := bs[sepEndPos]
|
||||
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
|
||||
continue
|
||||
}
|
||||
for {
|
||||
// left part (before "@") is likely to be the "userinfo" (single username, or "username:password")
|
||||
leftPos := sepAtPos - 1
|
||||
leftLoop:
|
||||
for leftPos >= 0 {
|
||||
c := bs[leftPos]
|
||||
switch c {
|
||||
case '@':
|
||||
sepAtPos = sepEndPos
|
||||
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
|
||||
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
|
||||
// RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
|
||||
default:
|
||||
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
|
||||
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
|
||||
if !valid {
|
||||
break leftLoop
|
||||
}
|
||||
}
|
||||
leftPos--
|
||||
}
|
||||
// if there is '@', and the string is like "s://u@h", then hide the "u" part
|
||||
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
|
||||
out = append(out, bs[:schemeSepPos]...)
|
||||
out = append(out, userPlaceholder...)
|
||||
out = append(out, bs[sepAtPos:sepEndPos]...)
|
||||
// left pos should point to the beginning of the left part, this pos is always valid in the buffer
|
||||
leftPos++
|
||||
|
||||
// right part is likely to be the host (domain name, ip address)
|
||||
rightPos := sepAtPos + 1
|
||||
rightLoop:
|
||||
for rightPos < len(bs) {
|
||||
c := bs[rightPos]
|
||||
switch c {
|
||||
case '.', '-':
|
||||
// valid host char
|
||||
case '[':
|
||||
// ipv6 begin
|
||||
if rightPos != sepAtPos+1 {
|
||||
break rightLoop
|
||||
}
|
||||
case ']':
|
||||
// ipv6 end
|
||||
rightPos++
|
||||
break rightLoop
|
||||
default:
|
||||
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
|
||||
if bs[sepAtPos+1] == '[' {
|
||||
// ipv6 host
|
||||
valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':'
|
||||
}
|
||||
if !valid {
|
||||
break rightLoop
|
||||
}
|
||||
}
|
||||
rightPos++
|
||||
}
|
||||
|
||||
leading, leftPart, rightPart := bs[:leftPos], bs[leftPos:sepAtPos], bs[sepAtPos+1:rightPos]
|
||||
|
||||
// Either:
|
||||
// * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern
|
||||
// * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo)
|
||||
needSanitize := bytes.IndexByte(leftPart, ':') >= 0 || bytes.HasSuffix(leading, schemeSep)
|
||||
needSanitize = needSanitize && len(leftPart) > 0 && len(rightPart) > 0
|
||||
// TODO: can also do more checks for right part
|
||||
// for example: ipv6 quick check
|
||||
if needSanitize && rightPart[0] == '[' {
|
||||
needSanitize = rightPart[len(rightPart)-1] == ']' && net.ParseIP(UnsafeBytesToString(rightPart[1:len(rightPart)-1])) != nil
|
||||
}
|
||||
if needSanitize {
|
||||
res = append(res, leading...)
|
||||
res = append(res, userInfoPlaceholder...)
|
||||
res = append(res, '@')
|
||||
res = append(res, rightPart...)
|
||||
} else {
|
||||
out = append(out, bs[:sepEndPos]...)
|
||||
res = append(res, bs[:rightPos]...)
|
||||
}
|
||||
bs = bs[rightPos:]
|
||||
sepAtPos = bytes.IndexByte(bs, '@')
|
||||
if sepAtPos == -1 {
|
||||
break
|
||||
}
|
||||
bs = bs[sepEndPos:]
|
||||
schemeSepPos = bytes.Index(bs, schemeSep)
|
||||
}
|
||||
out = append(out, bs...)
|
||||
return UnsafeBytesToString(out)
|
||||
res = append(res, bs...)
|
||||
return UnsafeBytesToString(res)
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
func TestSanitizeErrorCredentialURLs(t *testing.T) {
|
||||
err := errors.New("error with https://a@b.com")
|
||||
se := SanitizeErrorCredentialURLs(err)
|
||||
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
|
||||
assert.Equal(t, "error with https://"+userInfoPlaceholder+"@b.com", se.Error())
|
||||
}
|
||||
|
||||
func TestSanitizeCredentialURLs(t *testing.T) {
|
||||
@@ -27,15 +27,35 @@ func TestSanitizeCredentialURLs(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"https://mytoken@github.com/go-gitea/test_repo.git",
|
||||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
|
||||
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
|
||||
},
|
||||
{
|
||||
"https://user:password@github.com/go-gitea/test_repo.git",
|
||||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
|
||||
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
|
||||
},
|
||||
{
|
||||
"https://user:password@[::]/go-gitea/test_repo.git",
|
||||
"https://" + userInfoPlaceholder + "@[::]/go-gitea/test_repo.git",
|
||||
},
|
||||
{
|
||||
"https://user:password@[2001:db8::1]:8080/go-gitea/test_repo.git",
|
||||
"https://" + userInfoPlaceholder + "@[2001:db8::1]:8080/go-gitea/test_repo.git",
|
||||
},
|
||||
{
|
||||
"see https://u:p@[::1]/x and https://u2:p2@h2",
|
||||
"see https://" + userInfoPlaceholder + "@[::1]/x and https://" + userInfoPlaceholder + "@h2",
|
||||
},
|
||||
{
|
||||
"https://user:secret@[unclosed-ipv6",
|
||||
"https://user:secret@[unclosed-ipv6",
|
||||
},
|
||||
{
|
||||
"https://user:secret@[invalid-ipv6]",
|
||||
"https://user:secret@[invalid-ipv6]",
|
||||
},
|
||||
{
|
||||
"ftp://x@",
|
||||
"ftp://" + userPlaceholder + "@",
|
||||
"ftp://x@",
|
||||
},
|
||||
{
|
||||
"ftp://x/@",
|
||||
@@ -43,27 +63,40 @@ func TestSanitizeCredentialURLs(t *testing.T) {
|
||||
},
|
||||
{
|
||||
"ftp://u@x/@", // test multiple @ chars
|
||||
"ftp://" + userPlaceholder + "@x/@",
|
||||
"ftp://" + userInfoPlaceholder + "@x/@",
|
||||
},
|
||||
{
|
||||
"😊ftp://u@x😊", // test unicode
|
||||
"😊ftp://" + userPlaceholder + "@x😊",
|
||||
"😊ftp://" + userInfoPlaceholder + "@x😊",
|
||||
},
|
||||
{
|
||||
"://@",
|
||||
"://@",
|
||||
},
|
||||
{
|
||||
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
|
||||
"//u:p@h",
|
||||
"//" + userInfoPlaceholder + "@h",
|
||||
},
|
||||
{
|
||||
"s://u@h", // the minimal pattern to be sanitized
|
||||
"s://" + userPlaceholder + "@h",
|
||||
"s://u@h",
|
||||
"s://" + userInfoPlaceholder + "@h",
|
||||
},
|
||||
{
|
||||
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
|
||||
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
|
||||
"URLs in log https://" + userInfoPlaceholder + "@h and https://" + userInfoPlaceholder + "@h:80/, with https://h.com and u@h.com",
|
||||
},
|
||||
{
|
||||
"fatal: unable to look up username:token@github.com (port 9418)",
|
||||
"fatal: unable to look up " + userInfoPlaceholder + "@github.com (port 9418)",
|
||||
},
|
||||
{
|
||||
"git failed for user:token@github.com/go-gitea/test_repo.git",
|
||||
"git failed for " + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
|
||||
},
|
||||
{
|
||||
// SSH-form git URL ("git@host:path") must not let a later credential URL through
|
||||
"failed remote git@github.com:foo, retried via https://user:tok@github.com/foo",
|
||||
"failed remote git@github.com:foo, retried via https://" + userInfoPlaceholder + "@github.com/foo",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -218,7 +218,7 @@ func migrateRepository(ctx context.Context, doer *user_model.User, downloader ba
|
||||
// We don't actually need to check the OriginalURL as it isn't used anywhere
|
||||
}
|
||||
|
||||
log.Trace("migrating git data from %s", repo.CloneURL)
|
||||
log.Trace("migrating git data from %s", util.SanitizeCredentialURLs(repo.CloneURL))
|
||||
messenger("repo.migrate.migrating_git")
|
||||
if err = uploader.CreateRepo(ctx, repo, opts); err != nil {
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user