test/integration/logs: update benchmark support

When trying again with recent log files from the CI job, it was found that some
JSON messages get split across multiple lines, both in container logs and in
the systemd journal:

   2022-12-21T07:09:47.914739996Z stderr F {"ts":1671606587914.691,"caller":"rest/request.go:1169","msg":"Response ...
   2022-12-21T07:09:47.914984628Z stderr F 70 72  6f 78 79 10 01 1a 13 53 ... \".|\n","v":8}

Note the different time stamp on the second line. That first line is
long (17384 bytes). This seems to happen because the data must pass through a
stream-oriented pipe and thus may get split up by the Linux kernel.

The implication is that lines must get merged whenever the JSON decoder
encounters an incomplete line. The benchmark loader now supports that. To
simplifies this, stripping the non-JSON line prefixes must be done before using
a log as test data.

The updated README explains how to do that when downloading a CI job
result. The amount of manual work gets reduced by committing symlinks under
data to the expected location under ci-kubernetes-kind-e2e-json-logging and
ignoring them when the data is not there.

Support for symlinks gets removed and path/filepath is used instead of path
because it has better Windows support.
This commit is contained in:
Patrick Ohly 2022-12-21 11:51:11 +01:00
parent 1692de5f17
commit 97a8d72a67
8 changed files with 170 additions and 28 deletions

View File

@ -0,0 +1,10 @@
# Created by get-logs.sh:
/ci-kubernetes-kind-e2e-json-logging
/data/kind-worker-kubelet.log
/data/kube-apiserver.log
/data/kube-controller-manager.log
/data/kube-scheduler.log
/data/v3/kind-worker-kubelet.log
/data/v3/kube-apiserver.log
/data/v3/kube-controller-manager.log
/data/v3/kube-scheduler.log

View File

@ -28,9 +28,29 @@ Prow job:
- `artifacts/logs/kind-control-plane/containers` - `artifacts/logs/kind-control-plane/containers`
- `artifacts/logs/kind-*/kubelet.log` - `artifacts/logs/kind-*/kubelet.log`
With sufficient credentials, `gsutil` can be used to download everything for a job with: With sufficient credentials, `gsutil` can be used to download everything for a job directly
into a directory that then will be used by the benchmarks automatically:
``` ```
gsutil -m cp -R gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/<job ID> . kubernetes$ test/integration/logs/benchmark/get-logs.sh
++ dirname test/integration/logs/benchmark/get-logs.sh
+ cd test/integration/logs/benchmark
++ latest_job
++ gsutil cat gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/latest-build.txt
+ job=1618864842834186240
+ rm -rf ci-kubernetes-kind-e2e-json-logging
+ mkdir ci-kubernetes-kind-e2e-json-logging
...
```
This sets up the `data` directory so that additional test cases are available
(`BenchmarkEncoding/v3/kind-worker-kubelet/`,
`BenchmarkEncoding/kube-scheduler/`, etc.).
To clean up, use
```
git clean -fx test/integration/logs/benchmark
``` ```
## Analyzing log data ## Analyzing log data

View File

@ -23,7 +23,6 @@ import (
"io" "io"
"io/fs" "io/fs"
"os" "os"
"path"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strconv" "strconv"
@ -38,10 +37,11 @@ import (
) )
func BenchmarkEncoding(b *testing.B) { func BenchmarkEncoding(b *testing.B) {
seen := map[string]bool{}
// Each "data/(v[0-9]/)?*.log" file is expected to contain JSON log // Each "data/(v[0-9]/)?*.log" file is expected to contain JSON log
// messages. We generate one sub-benchmark for each file where logging // messages. We generate one sub-benchmark for each file where logging
// is tested with the log level from the directory. Symlinks can be // is tested with the log level from the directory.
// used to test the same file with different levels.
if err := filepath.Walk("data", func(path string, info fs.FileInfo, err error) error { if err := filepath.Walk("data", func(path string, info fs.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
@ -53,8 +53,14 @@ func BenchmarkEncoding(b *testing.B) {
if err != nil { if err != nil {
return err return err
} }
if info.Mode()&fs.ModeSymlink == 0 { // Only print unique file statistics. They get shown for the
b.Log(path + "\n" + stats.String()) // first file where new statistics are encountered. The
// assumption here is that the there are no files with
// different content and exactly the same statistics.
statsStr := stats.String()
if !seen[statsStr] {
b.Log(path + "\n" + statsStr)
seen[statsStr] = true
} }
b.Run(strings.TrimSuffix(strings.TrimPrefix(path, "data/"), ".log"), func(b *testing.B) { b.Run(strings.TrimSuffix(strings.TrimPrefix(path, "data/"), ".log"), func(b *testing.B) {
// Take verbosity threshold from directory, if present. // Take verbosity threshold from directory, if present.
@ -108,7 +114,7 @@ func BenchmarkEncoding(b *testing.B) {
fileSizes["JSON"] = int(output) / b.N fileSizes["JSON"] = int(output) / b.N
}) })
b.Log(fmt.Sprintf("file sizes: %v\n", fileSizes)) b.Log(fmt.Sprintf("%s: file sizes: %v\n", path, fileSizes))
}) })
return nil return nil
}); err != nil { }); err != nil {
@ -164,7 +170,7 @@ func benchmarkOutputFormats(b *testing.B, config loadGeneratorConfig, discard bo
var out *os.File var out *os.File
if !discard { if !discard {
var err error var err error
out, err = os.Create(path.Join(tmpDir, "all.log")) out, err = os.Create(filepath.Join(tmpDir, "all.log"))
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
@ -177,12 +183,12 @@ func benchmarkOutputFormats(b *testing.B, config loadGeneratorConfig, discard bo
var out1, out2 *os.File var out1, out2 *os.File
if !discard { if !discard {
var err error var err error
out1, err = os.Create(path.Join(tmpDir, "stream-1.log")) out1, err = os.Create(filepath.Join(tmpDir, "stream-1.log"))
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
defer out1.Close() defer out1.Close()
out2, err = os.Create(path.Join(tmpDir, "stream-2.log")) out2, err = os.Create(filepath.Join(tmpDir, "stream-2.log"))
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }

View File

@ -1,2 +1,2 @@
# This is a manually created message. See https://github.com/kubernetes/kubernetes/issues/106652 for the real one. # This is a manually created message. See https://github.com/kubernetes/kubernetes/issues/106652 for the real one.
Nov 19 02:13:48 kind-worker2 kubelet[250]: {"ts":1637288028968.0125,"caller":"kuberuntime/kuberuntime_manager.go:902","msg":"Creating container in pod","v":0,"container":{"Name":"terminate-cmd-rpn","Image":"registry.k8s.io/e2e-test-images/busybox:1.29-2","Command":["sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n"],"TerminationMessagePath":"/dev/termination-log"}} {"ts":1637288028968.0125,"caller":"kuberuntime/kuberuntime_manager.go:902","msg":"Creating container in pod","v":0,"container":{"Name":"terminate-cmd-rpn","Image":"registry.k8s.io/e2e-test-images/busybox:1.29-2","Command":["sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n"],"TerminationMessagePath":"/dev/termination-log"}}

View File

@ -0,0 +1,3 @@
{"v": 0,
"msg": "Pod status updated"}
{"v": 0, "msg": "Pod status updated again"}

View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Usage: get-logs.sh [<job ID>]
#
# Downloads the latest job output or the one with the specified ID
# and prepares running benchmarks for it.
set -o pipefail
set -o errexit
set -x
cd "$(dirname "$0")"
latest_job () {
gsutil cat gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/latest-build.txt
}
job=${1:-$(latest_job)}
rm -rf ci-kubernetes-kind-e2e-json-logging
mkdir ci-kubernetes-kind-e2e-json-logging
gsutil -m cp -R "gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/${job}/*" ci-kubernetes-kind-e2e-json-logging/
for i in kube-apiserver kube-controller-manager kube-scheduler; do
# Before (container runtime log dump (?)):
# 2023-03-07T07:30:52.193301924Z stderr F {"ts":1678174252192.0676,"caller":"scheduler/schedule_one.go:81","msg":"Attempting to schedule pod","v":3,"pod":{"name":"simpletest.rc-zgd47","namespace":"gc-5422"}}
# After:
# {"ts":1678174252192.0676,"caller":"scheduler/schedule_one.go:81","msg":"Attempting to schedule pod","v":3,"pod":{"name":"simpletest.rc-zgd47","namespace":"gc-5422"}}
sed -e 's/^20[^ ]* stderr . //' \
ci-kubernetes-kind-e2e-json-logging/artifacts/kind-control-plane/containers/$i-*.log \
> ci-kubernetes-kind-e2e-json-logging/$i.log;
done
# Before (systemd format):
# Mar 07 07:22:05 kind-control-plane kubelet[288]: {"ts":1678173725722.4487,"caller":"flag/flags.go:64","msg":"FLAG: --address=\"0.0.0.0\"\n","v":1}
# After:
# {"ts":1678173725722.4487,"caller":"flag/flags.go:64","msg":"FLAG: --address=\"0.0.0.0\"\n","v":1}
grep 'kind-worker kubelet' ci-kubernetes-kind-e2e-json-logging/artifacts/kind-worker/kubelet.log | \
sed -e 's;^.* kind-worker kubelet[^ ]*: ;;' > ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log
# Create copies of the actual files, whether they already exist or not. To
# clean up disk space, use "git clean -fx test/integration/logs/benchmark".
copy () {
from="$1"
to="$2"
mkdir -p "$(dirname "$to")"
rm -f "$to"
cp "$from" "$to"
}
copy ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log data/kind-worker-kubelet.log
copy ci-kubernetes-kind-e2e-json-logging/kube-apiserver.log data/kube-apiserver.log
copy ci-kubernetes-kind-e2e-json-logging/kube-controller-manager.log data/kube-controller-manager.log
copy ci-kubernetes-kind-e2e-json-logging/kube-scheduler.log data/kube-scheduler.log
copy ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log data/v3/kind-worker-kubelet.log
copy ci-kubernetes-kind-e2e-json-logging/kube-apiserver.log data/v3/kube-apiserver.log
copy ci-kubernetes-kind-e2e-json-logging/kube-controller-manager.log data/v3/kube-controller-manager.log
copy ci-kubernetes-kind-e2e-json-logging/kube-scheduler.log data/v3/kube-scheduler.log

View File

@ -29,7 +29,7 @@ import (
"strings" "strings"
"text/template" "text/template"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
) )
@ -52,7 +52,7 @@ const (
) )
type logStats struct { type logStats struct {
TotalLines, JsonLines, ErrorMessages int TotalLines, JsonLines, SplitLines, ErrorMessages int
ArgCounts map[string]int ArgCounts map[string]int
OtherLines []string OtherLines []string
@ -73,10 +73,11 @@ var (
return x - y return x - y
}, },
}).Parse(`Total number of lines: {{.TotalLines}} }).Parse(`Total number of lines: {{.TotalLines}}
JSON line continuation: {{.SplitLines}}
Valid JSON messages: {{.JsonLines}} ({{percent .JsonLines .TotalLines}} of total lines) Valid JSON messages: {{.JsonLines}} ({{percent .JsonLines .TotalLines}} of total lines)
Error messages: {{.ErrorMessages}} ({{percent .ErrorMessages .JsonLines}} of valid JSON messages) Error messages: {{.ErrorMessages}} ({{percent .ErrorMessages .JsonLines}} of valid JSON messages)
Unrecognized lines: {{sub .TotalLines .JsonLines}} Unrecognized lines: {{sub (sub .TotalLines .JsonLines) .SplitLines}}
{{range .OtherLines}} {{.}} {{range .OtherLines}} {{if gt (len .) 80}}{{slice . 0 80}}{{else}}{{.}}{{end}}
{{end}} {{end}}
Args: Args:
total: {{if .ArgCounts.total}}{{.ArgCounts.total}}{{else}}0{{end}}{{if .ArgCounts.string}} total: {{if .ArgCounts.total}}{{.ArgCounts.total}}{{else}}0{{end}}{{if .ArgCounts.string}}
@ -119,14 +120,28 @@ func loadLog(path string) (messages []logMessage, stats logStats, err error) {
stats.ArgCounts = map[string]int{} stats.ArgCounts = map[string]int{}
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
var buffer bytes.Buffer
for lineNo := 0; scanner.Scan(); lineNo++ { for lineNo := 0; scanner.Scan(); lineNo++ {
stats.TotalLines++
line := scanner.Bytes() line := scanner.Bytes()
msg, err := parseLine(line, &stats) buffer.Write(line)
msg, err := parseLine(buffer.Bytes(), &stats)
if err != nil { if err != nil {
// JSON might have been split across multiple lines.
var jsonErr *json.SyntaxError
if errors.As(err, &jsonErr) && jsonErr.Offset > 1 {
// The start of the buffer was okay. Keep the
// data and add the next line to it.
stats.SplitLines++
continue
}
stats.OtherLines = append(stats.OtherLines, fmt.Sprintf("%d: %s", lineNo, string(line))) stats.OtherLines = append(stats.OtherLines, fmt.Sprintf("%d: %s", lineNo, string(line)))
buffer.Reset()
continue continue
} }
stats.JsonLines++
messages = append(messages, msg) messages = append(messages, msg)
buffer.Reset()
} }
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
@ -136,26 +151,16 @@ func loadLog(path string) (messages []logMessage, stats logStats, err error) {
return return
} }
// systemd prefix:
// Nov 19 02:08:51 kind-worker2 kubelet[250]: {"ts":1637287731687.8315,...
//
// kubectl (?) prefix:
// 2021-11-19T02:08:28.475825534Z stderr F {"ts": ...
var prefixRE = regexp.MustCompile(`^\w+ \d+ \S+ \S+ \S+: |\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z stderr . `)
// String format for API structs from generated.pb.go. // String format for API structs from generated.pb.go.
// &Container{...} // &Container{...}
var objectRE = regexp.MustCompile(`^&([a-zA-Z]*)\{`) var objectRE = regexp.MustCompile(`^&([a-zA-Z]*)\{`)
func parseLine(line []byte, stats *logStats) (item logMessage, err error) { func parseLine(line []byte, stats *logStats) (item logMessage, err error) {
stats.TotalLines++
line = prefixRE.ReplaceAll(line, nil)
content := map[string]interface{}{} content := map[string]interface{}{}
if err := json.Unmarshal(line, &content); err != nil { if err := json.Unmarshal(line, &content); err != nil {
return logMessage{}, fmt.Errorf("JSON parsing failed: %v", err) return logMessage{}, fmt.Errorf("JSON parsing failed: %w", err)
} }
stats.JsonLines++
kvs := map[string]interface{}{} kvs := map[string]interface{}{}
item.isError = true item.isError = true

View File

@ -58,6 +58,31 @@ func TestData(t *testing.T) {
ArgCounts: map[string]int{}, ArgCounts: map[string]int{},
}, },
}, },
"data/split.log": {
messages: []logMessage{
{
msg: "Pod status updated",
},
{
msg: "Pod status updated again",
},
},
printf: `Pod status updated: []
Pod status updated again: []
`,
structured: `"Pod status updated"
"Pod status updated again"
`,
json: `{"msg":"Pod status updated","v":0}
{"msg":"Pod status updated again","v":0}
`,
stats: logStats{
TotalLines: 3,
SplitLines: 1,
JsonLines: 2,
ArgCounts: map[string]int{},
},
},
"data/error.log": { "data/error.log": {
messages: []logMessage{ messages: []logMessage{
{ {