prune-junit-xml: simplify failure message

In Go unit tests, the entire test output becomes the failure message because
`go test` doesn't track why a test fails. This can make the failure message
pretty large, in particular in integration tests.

We cannot identify the real failure either because Kubernetes has no convention
for how to format test failures. What we can do is recognize log output added
by klog.

prune-junit-xml now moves the full text to to the test output and only keep
those lines in the failure which are not from klog.

The klog output parsing might eventually get moved to
k8s.io/logtools/logparse. For now it is developed as a sub-package of
prune-junit-xml.
This commit is contained in:
Patrick Ohly 2024-10-08 16:48:16 +02:00
parent c9a61afdb7
commit fba2bcf2d2
5 changed files with 480 additions and 16 deletions

View File

@ -0,0 +1,234 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package logparse provides a parser for the klog text format:
//
// I1007 13:16:55.727802 1146763 example.go:57] "Key/value encoding" logger="example" foo="bar" duration="1s" int=42 float=3.14 string="hello world" quotedString="hello \"world\"" multiLinString=<
// hello
// world
// >
// E1007 15:20:04.343375 1157755 example.go:41] Log using Errorf, err: fail
//
// It also supports the klog/ktesting unit test log output:
//
// === RUN TestKlogr
// example_test.go:45: I1007 13:28:21.908998] hello world
// example_test.go:46: E1007 13:28:21.909034] failed err="failed: some error"
// example_test.go:47: I1007 13:28:21.909042] verbosity 1
// example_test.go:48: I1007 13:28:21.909047] main/helper: with prefix
// example_test.go:50: I1007 13:28:21.909076] key/value pairs int=1 float=2 pair="(1, 2)" raw={"Name":"joe","NS":"kube-system"} slice=[1,2,3,"str"] map={"a":1,"b":2} kobj="kube-system/joe" string="hello world" quotedString="hello \"world\"" multiLineString=<
// hello
// world
// >
// example_test.go:63: I1007 13:28:21.909085] info message level 4
// example_test.go:64: I1007 13:28:21.909089] info message level 5
// --- PASS: TestKlogr (0.00s)
// PASS
// ok k8s.io/klog/v2/ktesting/example (cached)
//
// Arbitrary indention with space or tab is supported. All lines of
// a klog log entry must be indented the same way.
package logparse
import (
"bufio"
"errors"
"fmt"
"io"
"iter"
"regexp"
"strings"
)
// Parse splits log output provided by the reader into individual log entries.
// The original log can be reconstructed verbatim by concatenating these
// entries. If the reader fails with an error, the last log entry will
// capture that error.
func Parse(in io.Reader) []Entry {
var log []Entry
for entry := range All(in) {
log = append(log, entry)
}
return log
}
// All is like Parse except that it can be used in a for/range loop:
//
// for entry := range logparse.All(reader) {
// // entry is the next log entry.
// }
func All(in io.Reader) iter.Seq[Entry] {
return func(yield func(Entry) bool) {
parse(in, yield)
}
}
// Entry is the base type for a log entry.
//
// Use type assertions to check for additional information. All
// of the instances behind this interface are pointers.
type Entry interface {
// LogData returns a verbatim copy of the original log chunk,
// including one or more line breaks. Concatenating these chunks
// from all entries will reconstruct the parsed log.
LogData() string
}
// ErrorEntry captures the error encountered while reading the log input.
type ErrorEntry struct {
Err error
}
var _ Entry = &ErrorEntry{}
var _ fmt.Stringer = &ErrorEntry{}
func (e *ErrorEntry) LogData() string { return "" }
func (e *ErrorEntry) String() string { return e.Err.Error() }
// OtherEntry captures some log line which is not part of a klog log entry.
type OtherEntry struct {
Data string
}
var _ Entry = &OtherEntry{}
var _ fmt.Stringer = &OtherEntry{}
func (e *OtherEntry) LogData() string { return e.Data }
func (e *OtherEntry) String() string { return e.Data }
// LogEntry captures some log entry which was recognized as coming from klog.
type KlogEntry struct {
Data string
Severity Severity
}
type Severity byte
const (
SeverityInfo = Severity('I')
SeverityWarning = Severity('W')
SeverityError = Severity('E')
SeverityFatal = Severity('F')
)
var _ Entry = &KlogEntry{}
var _ fmt.Stringer = &KlogEntry{}
func (e *KlogEntry) LogData() string { return e.Data }
func (e *KlogEntry) String() string { return e.Data }
func parse(in io.Reader, yield func(Entry) bool) {
// Read lines using arbitrary length, which can't be done with a
// bufio.Scanner.
reader := bufio.NewReader(in)
line, err := reader.ReadString('\n')
for {
// First deliver the current line. This may need to look
// ahead and thus returns the next line.
var nextLine string
var nextErr error
if len(line) > 0 {
var cont bool
nextLine, cont, nextErr = parseLine(reader, line, yield)
if !cont {
return
}
} else {
nextLine, nextErr = reader.ReadString('\n')
}
// Finalize parsing?
switch {
case err == nil:
// Okay.
case errors.Is(err, io.EOF):
return
default:
yield(&ErrorEntry{Err: err})
return
}
line = nextLine
err = nextErr
}
}
const (
pid = `(?<pid>[[:digit:]]+)`
source = `(?<source>[^:]+:[[:digit:]]+)`
severity = `(?<severity>[IWEF])`
datetime = `(?<month>[[:digit:]]{2})(?<day>[[:digit:]]{2}) (?<hour>[[:digit:]]{2}):(?<minutes>[[:digit:]]{2}):(?<seconds>[[:digit:]]{2})\.(?<microseconds>[[:digit:]]{6})`
)
var (
klogPrefix = regexp.MustCompile(`^(?<indention>[[:blank:]]*)` +
`(?:` + source + `: )?` + // `go test` source code
severity +
datetime +
`(?: +` + pid + ` ` + source + `)?` + // klog pid + source code
`\] `)
indentionIndex = lookupSubmatch("indention")
severityIndex = lookupSubmatch("severity")
)
func lookupSubmatch(name string) int {
names := klogPrefix.SubexpNames()
for i, n := range names {
if n == name {
return i
}
}
panic(fmt.Errorf("named submatch %q not found in %q", name, klogPrefix.String()))
}
// parseLine deals with one non-empty line. It returns the result of yield and
// potentially the next line and/or a read error. If it doesn't have any new
// data to process, it returns the empty string and a nil error.
func parseLine(reader *bufio.Reader, line string, yield func(Entry) bool) (string, bool, error) {
match := klogPrefix.FindStringSubmatchIndex(line)
if match == nil {
cont := yield(&OtherEntry{Data: line})
return "", cont, nil
}
e := &KlogEntry{
Data: line,
Severity: Severity(line[match[2*severityIndex]]),
// TODO (?): store more of the fields that have been identified
}
// Deal with potential line continuation of multi-line string value,
// if necessary.
if !strings.HasSuffix(line, "=<\n") {
return "", yield(e), nil
}
indention := line[match[2*indentionIndex]:match[2*indentionIndex+1]]
for {
var err error
line, err = reader.ReadString('\n')
if !strings.HasPrefix(line, indention) ||
!strings.HasPrefix(line[len(indention):], "\t") && !strings.HasPrefix(line[len(indention):], " >") {
// Some other line (wrong indention or wrong continuation).
// Yield what we have so far and the go back to processing that new line.
cont := yield(e)
return line, cont, err
}
e.Data += line
if err != nil {
return "", yield(e), err
}
}
}

View File

@ -0,0 +1,187 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package logparse
import (
"errors"
"fmt"
"io"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestParse(t *testing.T) {
t.Logf("Full regular expression:\n%s", klogPrefix.String())
fakeErr := errors.New("fake error")
testcases := map[string]struct {
log string
err error
expectEntries []Entry
}{
"empty": {},
"one-other": {
log: "other",
expectEntries: []Entry{
&OtherEntry{Data: "other"},
},
},
"one-klog": {
log: `I1007 13:16:55.727802 1146763 example.go:57] "Key/value encoding" logger="example"`,
expectEntries: []Entry{
&KlogEntry{
Data: `I1007 13:16:55.727802 1146763 example.go:57] "Key/value encoding" logger="example"`,
Severity: SeverityInfo,
},
},
},
"one-unit": {
log: `example_test.go:45: E1007 13:28:21.908998] hello world`,
expectEntries: []Entry{
&KlogEntry{
Data: `example_test.go:45: E1007 13:28:21.908998] hello world`,
Severity: SeverityError,
},
},
},
"mixture": {
log: `other
I1007 13:16:55.727802 1146763 example.go:57] "a"
E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented
>
middle
example_test.go:45: E1007 13:28:21.908998] hello world
`,
expectEntries: []Entry{
&OtherEntry{Data: "other\n"},
&KlogEntry{
Data: `I1007 13:16:55.727802 1146763 example.go:57] "a"
`,
Severity: SeverityInfo,
},
&KlogEntry{
Data: ` E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented
>
`, Severity: SeverityError,
},
&OtherEntry{Data: "middle\n"},
&KlogEntry{
Data: `example_test.go:45: E1007 13:28:21.908998] hello world
`,
Severity: SeverityError,
},
},
},
"truncated": {
log: `other
E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented
middle
`,
expectEntries: []Entry{
&OtherEntry{Data: "other\n"},
&KlogEntry{
Data: ` E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented
`, Severity: SeverityError,
},
&OtherEntry{Data: "middle\n"},
},
},
"error": {
log: "hello\nworld",
err: fakeErr,
expectEntries: []Entry{
&OtherEntry{Data: "hello\n"},
&OtherEntry{Data: "world"},
&ErrorEntry{Err: fakeErr},
},
},
"truncated-error": {
log: `other
E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented`,
err: fakeErr,
expectEntries: []Entry{
&OtherEntry{Data: "other\n"},
&KlogEntry{
Data: ` E1007 13:16:55.727802 1146763 example.go:58] "b" foo=<
indented`, Severity: SeverityError,
},
&ErrorEntry{Err: fakeErr},
},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
actualEntries := Parse(&fakeReader{log: tc.log, err: tc.err})
require.Equal(t, tc.expectEntries, actualEntries)
var buf strings.Builder
for _, entry := range actualEntries {
_, _ = buf.WriteString(entry.LogData())
}
require.Equal(t, tc.log, buf.String())
for i := 0; i < len(tc.expectEntries); i++ {
t.Run(fmt.Sprintf("stop-after-%d", i), func(t *testing.T) {
var actualEntries []Entry
seq := All(&fakeReader{log: tc.log, err: tc.err})
e := 0
seq(func(entry Entry) bool {
actualEntries = append(actualEntries, entry)
if e >= i {
return false
}
e++
return true
})
require.Equal(t, tc.expectEntries[0:i+1], actualEntries)
})
}
})
}
}
type fakeReader struct {
log string
err error
offset int
}
func (f *fakeReader) Read(buf []byte) (int, error) {
n := min(len(buf), len(f.log)-f.offset)
copy(buf, []byte(f.log[f.offset:f.offset+n]))
f.offset += n
var err error
if f.offset >= len(f.log) {
if f.err == nil {
err = io.EOF
} else {
err = f.err
}
}
return n, err
}

View File

@ -23,7 +23,9 @@ import (
"io"
"os"
"regexp"
"strings"
"k8s.io/kubernetes/cmd/prune-junit-xml/logparse"
"k8s.io/kubernetes/third_party/forked/gotestsum/junitxml"
)
@ -64,27 +66,59 @@ func main() {
func pruneXML(suites *junitxml.JUnitTestSuites, maxBytes int) {
for _, suite := range suites.Suites {
for _, testcase := range suite.TestCases {
for i := range suite.TestCases {
// Modify directly in the TestCases slice, if necessary.
testcase := &suite.TestCases[i]
if testcase.SkipMessage != nil {
if len(testcase.SkipMessage.Message) > maxBytes {
fmt.Printf("clipping skip message in test case : %s\n", testcase.Name)
head := testcase.SkipMessage.Message[:maxBytes/2]
tail := testcase.SkipMessage.Message[len(testcase.SkipMessage.Message)-maxBytes/2:]
testcase.SkipMessage.Message = head + "[...clipped...]" + tail
}
pruneStringIfNeeded(&testcase.SkipMessage.Message, maxBytes, "clipping skip message in test case : %s\n", testcase.Name)
}
if testcase.Failure != nil {
if len(testcase.Failure.Contents) > maxBytes {
fmt.Printf("clipping failure message in test case : %s\n", testcase.Name)
head := testcase.Failure.Contents[:maxBytes/2]
tail := testcase.Failure.Contents[len(testcase.Failure.Contents)-maxBytes/2:]
testcase.Failure.Contents = head + "[...clipped...]" + tail
// In Go unit tests, the entire test output
// becomes the failure message because `go
// test` doesn't track why a test fails. This
// can make the failure message pretty large.
//
// We cannot identify the real failure here
// either because Kubernetes has no convention
// for how to format test failures. What we can
// do is recognize log output added by klog.
//
// Therefore here we move the full text to
// to the test output and only keep those
// lines in the failure which are not from
// klog.
if testcase.SystemOut == "" {
var buf strings.Builder
// Iterate over all the log entries and decide what to keep as failure message.
for entry := range logparse.All(strings.NewReader(testcase.Failure.Contents)) {
if _, ok := entry.(*logparse.KlogEntry); ok {
continue
}
_, _ = buf.WriteString(entry.LogData())
}
if buf.Len() < len(testcase.Failure.Contents) {
// Update both strings because they became different.
testcase.SystemOut = testcase.Failure.Contents
pruneStringIfNeeded(&testcase.SystemOut, maxBytes, "clipping log output in test case: %s\n", testcase.Name)
testcase.Failure.Contents = buf.String()
}
}
pruneStringIfNeeded(&testcase.Failure.Contents, maxBytes, "clipping failure message in test case : %s\n", testcase.Name)
}
}
}
}
func pruneStringIfNeeded(str *string, maxBytes int, msg string, args ...any) {
if len(*str) <= maxBytes {
return
}
fmt.Printf(msg, args...)
head := (*str)[:maxBytes/2]
tail := (*str)[len(*str)-maxBytes/2:]
*str = head + "[...clipped...]" + tail
}
// This function condenses the junit xml to have package name as top level identifier
// and nesting under that.
func pruneTESTS(suites *junitxml.JUnitTestSuites) {

View File

@ -28,7 +28,7 @@ import (
func TestPruneXML(t *testing.T) {
sourceXML := `<?xml version="1.0" encoding="UTF-8"?>
<testsuites>
<testsuite tests="3" failures="1" time="271.610000" name="k8s.io/kubernetes/test/integration/apiserver" timestamp="">
<testsuite tests="4" failures="2" time="271.610000" name="k8s.io/kubernetes/test/integration/apiserver" timestamp="">
<properties>
<property name="go.version" value="go1.18 linux/amd64"></property>
</properties>
@ -39,12 +39,15 @@ func TestPruneXML(t *testing.T) {
<testcase classname="k8s.io/kubernetes/test/integration/apimachinery" name="TestSchedulerInformers" time="-0.000000">
<failure message="Failed" type="">&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/internal/transport/transport.go:169 +0x147&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc/internal/transport.(*transportReader).Read(0xc0e5f8edb0, {0xc0efe16f88?, 0xc1169d3a88?, 0x1804787?})&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/internal/transport/transport.go:483 +0x32&#xA;io.ReadAtLeast({0x55c5720, 0xc0e5f8edb0}, {0xc0efe16f88, 0x5, 0x5}, 0x5)&#xA;&#x9;/usr/local/go/src/io/io.go:331 +0x9a&#xA;io.ReadFull(...)&#xA;&#x9;/usr/local/go/src/io/io.go:350&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc/internal/transport.(*Stream).Read(0xc0f3cd67e0, {0xc0efe16f88, 0x5, 0x5})&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/internal/transport/transport.go:467 +0xa5&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.(*parser).recvMsg(0xc0efe16f78, 0x7fffffff)&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/rpc_util.go:559 +0x47&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.recvAndDecompress(0xc1169d3c58?, 0xc0f3cd67e0, {0x0, 0x0}, 0x7fffffff, 0x0, {0x0, 0x0})&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/rpc_util.go:690 +0x66&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.recv(0x172b28f?, {0x7f837c291d58, 0x7f84350}, 0x6f5a274d6e8f284c?, {0x0?, 0x0?}, {0x4be7d40, 0xc0f8c01d50}, 0x0?, 0x0, ...)&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/rpc_util.go:758 +0x6e&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.(*csAttempt).recvMsg(0xc0eb72d800, {0x4be7d40?, 0xc0f8c01d50}, 0x2?)&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/stream.go:970 +0x2b0&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.(*clientStream).RecvMsg.func1(0x4be7d40?)&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/stream.go:821 +0x25&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.(*clientStream).withRetry(0xc0f3cd65a0, 0xc1169d3e78, 0xc1169d3e48)&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/stream.go:675 +0x2f6&#xA;k8s.io/kubernetes/vendor/google.golang.org/grpc.(*clientStream).RecvMsg(0xc0f3cd65a0, {0x4be7d40?, 0xc0f8c01d50?})&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/google.golang.org/grpc/stream.go:820 +0x11f&#xA;k8s.io/kubernetes/vendor/github.com/grpc-ecosystem/go-grpc-prometheus.(*monitoredClientStream).RecvMsg(0xc0efe16f90, {0x4be7d40?, 0xc0f8c01d50?})&#xA;&#x9;/home/prow/go/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/vendor/github.com/grpc-ecosystem/go-grpc-prometheus/client_metrics.go:160</failure>
</testcase>
<testcase classname="k8s.io/kubernetes/test/integration/apimachinery" name="TestLog" time="-0.000000">
<failure message="Failed" type="">logparse_test.go:29: I1007 13:28:21.909085] klog output&#xA;FAIL: fake failure&#xA;</failure>
</testcase>
</testsuite>
</testsuites>`
outputXML := `<?xml version="1.0" encoding="UTF-8"?>
<testsuites>
<testsuite tests="3" failures="1" time="271.610000" name="k8s.io/kubernetes/test/integration/apiserver" timestamp="">
<testsuite tests="4" failures="2" time="271.610000" name="k8s.io/kubernetes/test/integration/apiserver" timestamp="">
<properties>
<property name="go.version" value="go1.18 linux/amd64"></property>
</properties>
@ -55,6 +58,10 @@ func TestPruneXML(t *testing.T) {
<testcase classname="k8s.io/kubernetes/test/integration/apimachinery" name="TestSchedulerInformers" time="-0.000000">
<failure message="Failed" type="">&#xA;&#x9;/home/prow/go/[...clipped...]t_metrics.go:160</failure>
</testcase>
<testcase classname="k8s.io/kubernetes/test/integration/apimachinery" name="TestLog" time="-0.000000">
<failure message="Failed" type="">FAIL: fake failure&#xA;</failure>
<system-out>logparse_test.go[...clipped...]L: fake failure&#xA;</system-out>
</testcase>
</testsuite>
</testsuites>`
suites, _ := fetchXML(strings.NewReader(sourceXML))

View File

@ -8,7 +8,7 @@ import (
// JUnitTestSuites is a collection of JUnit test suites.
type JUnitTestSuites struct {
XMLName xml.Name `xml:"testsuites"`
XMLName xml.Name `xml:"testsuites"`
Suites []JUnitTestSuite `xml:"testsuite,omitempty"`
}
@ -22,7 +22,7 @@ type JUnitTestSuite struct {
Name string `xml:"name,attr"`
Properties []JUnitProperty `xml:"properties>property,omitempty"`
TestCases []JUnitTestCase `xml:"testcase,omitempty"`
Timestamp string `xml:"timestamp,attr"`
Timestamp string `xml:"timestamp,attr"`
}
// JUnitTestCase is a single test case with its result.
@ -33,6 +33,8 @@ type JUnitTestCase struct {
Time string `xml:"time,attr"`
SkipMessage *JUnitSkipMessage `xml:"skipped,omitempty"`
Failure *JUnitFailure `xml:"failure,omitempty"`
SystemOut string `xml:"system-out,omitempty"`
SystemErr string `xml:"system-err,omitempty"`
}
// JUnitSkipMessage contains the reason why a testcase was skipped.