Fix bug when viewing the commit diff page with non-ANSI files (#36149)

Fix #35504

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Lunny Xiao
2025-12-13 05:54:03 -08:00
committed by GitHub
parent ac8308b5cb
commit 29057ea55f
11 changed files with 220 additions and 317 deletions

View File

@@ -835,11 +835,11 @@ parsingLoop:
if buffer.Len() == 0 {
continue
}
charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
if charsetLabel != "UTF-8" && err == nil {
encoding, _ := stdcharset.Lookup(charsetLabel)
if encoding != nil {
diffLineTypeDecoders[lineType] = encoding.NewDecoder()
charsetLabel, _ := charset.DetectEncoding(buffer.Bytes())
if charsetLabel != "UTF-8" {
charsetEncoding, _ := stdcharset.Lookup(charsetLabel)
if charsetEncoding != nil {
diffLineTypeDecoders[lineType] = charsetEncoding.NewDecoder()
}
}
}
@@ -1325,10 +1325,10 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
if shouldFullFileHighlight {
if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
}
if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.Bytes())
}
}
}
@@ -1336,9 +1336,34 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return diff, nil
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
func splitHighlightLines(buf []byte) (ret [][]byte) {
lineCount := bytes.Count(buf, []byte("\n")) + 1
ret = make([][]byte, 0, lineCount)
nlTagClose := []byte("\n</")
for {
pos := bytes.IndexByte(buf, '\n')
if pos == -1 {
ret = append(ret, buf)
return ret
}
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
// * "<span>text\n</span>"
// * "<span>text</span>\n"
if bytes.HasPrefix(buf[pos:], nlTagClose) {
pos1 := bytes.IndexByte(buf[pos:], '>')
if pos1 != -1 {
pos += pos1
}
}
ret = append(ret, buf[:pos+1])
buf = buf[pos+1:]
}
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML {
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
splitLines := strings.Split(string(highlightedNewContent), "\n")
splitLines := splitHighlightLines([]byte(highlightedNewContent))
lines := make(map[int]template.HTML, len(splitLines))
// only save the highlighted lines we need, but not the whole file, to save memory
for _, sec := range diffFile.Sections {

View File

@@ -5,6 +5,7 @@
package gitdiff
import (
"html/template"
"strconv"
"strings"
"testing"
@@ -1106,3 +1107,41 @@ func TestDiffLine_GetExpandDirection(t *testing.T) {
assert.Equal(t, c.direction, c.diffLine.GetExpandDirection(), "case %s expected direction: %s", c.name, c.direction)
}
}
func TestHighlightCodeLines(t *testing.T) {
t.Run("CharsetDetecting", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{{LeftIdx: 1}},
},
},
}
ret := highlightCodeLines(diffFile, true, []byte("// abc\xcc def\xcd")) // ISO-8859-1 bytes
assert.Equal(t, "<span class=\"c1\">// abcÌ defÍ\n</span>", string(ret[0]))
})
t.Run("LeftLines", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{
{LeftIdx: 1},
{LeftIdx: 2},
{LeftIdx: 3},
},
},
},
}
const nl = "\n"
ret := highlightCodeLines(diffFile, true, []byte("a\nb\n"))
assert.Equal(t, map[int]template.HTML{
0: `<span class="n">a</span>` + nl,
1: `<span class="n">b</span>`,
}, ret)
})
}

View File

@@ -25,12 +25,12 @@ func TestDiffWithHighlight(t *testing.T) {
t.Run("CleanUp", func(t *testing.T) {
hcd := newHighlightCodeDiff()
codeA := template.HTML(`<span class="cm>this is a comment</span>`)
codeB := template.HTML(`<span class="cm>this is updated comment</span>`)
codeA := template.HTML(`<span class="cm">this is a comment</span>`)
codeB := template.HTML(`<span class="cm">this is updated comment</span>`)
outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
assert.Equal(t, `<span class="cm>this is <span class="removed-code">a</span> comment</span>`, string(outDel))
assert.Equal(t, `<span class="cm">this is <span class="removed-code">a</span> comment</span>`, string(outDel))
outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
assert.Equal(t, `<span class="cm>this is <span class="added-code">updated</span> comment</span>`, string(outAdd))
assert.Equal(t, `<span class="cm">this is <span class="added-code">updated</span> comment</span>`, string(outAdd))
})
t.Run("OpenCloseTags", func(t *testing.T) {