test/integration/logs: update benchmark support

When trying again with recent log files from the CI job, it was found that some JSON messages get split across multiple lines, both in container logs and in the systemd journal: 2022-12-21T07:09:47.914739996Z stderr F {"ts":1671606587914.691,"caller":"rest/request.go:1169","msg":"Response ... 2022-12-21T07:09:47.914984628Z stderr F 70 72 6f 78 79 10 01 1a 13 53 ... \".|\n","v":8} Note the different time stamp on the second line. That first line is long (17384 bytes). This seems to happen because the data must pass through a stream-oriented pipe and thus may get split up by the Linux kernel. The implication is that lines must get merged whenever the JSON decoder encounters an incomplete line. The benchmark loader now supports that. To simplifies this, stripping the non-JSON line prefixes must be done before using a log as test data. The updated README explains how to do that when downloading a CI job result. The amount of manual work gets reduced by committing symlinks under data to the expected location under ci-kubernetes-kind-e2e-json-logging and ignoring them when the data is not there. Support for symlinks gets removed and path/filepath is used instead of path because it has better Windows support.
2026-01-06 07:57:35 +00:00 · 2022-12-21 11:51:11 +01:00
parent 1692de5f17
commit 97a8d72a67
8 changed files with 170 additions and 28 deletions
--- a/test/integration/logs/benchmark/.gitignore
+++ b/test/integration/logs/benchmark/.gitignore
@@ -0,0 +1,10 @@
+# Created by get-logs.sh:
+/ci-kubernetes-kind-e2e-json-logging
+/data/kind-worker-kubelet.log
+/data/kube-apiserver.log
+/data/kube-controller-manager.log
+/data/kube-scheduler.log
+/data/v3/kind-worker-kubelet.log
+/data/v3/kube-apiserver.log
+/data/v3/kube-controller-manager.log
+/data/v3/kube-scheduler.log
--- a/test/integration/logs/benchmark/README.md
+++ b/test/integration/logs/benchmark/README.md
@@ -28,9 +28,29 @@ Prow job:
 - `artifacts/logs/kind-control-plane/containers`
 - `artifacts/logs/kind-*/kubelet.log`

-With sufficient credentials, `gsutil` can be used to download everything for a job with:
+With sufficient credentials, `gsutil` can be used to download everything for a job directly
+into a directory that then will be used by the benchmarks automatically:
+
 ```
-gsutil -m cp -R gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/<job ID> .
+kubernetes$ test/integration/logs/benchmark/get-logs.sh
++ dirname test/integration/logs/benchmark/get-logs.sh
+ cd test/integration/logs/benchmark
++ latest_job
++ gsutil cat gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/latest-build.txt
+ job=1618864842834186240
+ rm -rf ci-kubernetes-kind-e2e-json-logging
+ mkdir ci-kubernetes-kind-e2e-json-logging
+...
+```
+
+This sets up the `data` directory so that additional test cases are available
+(`BenchmarkEncoding/v3/kind-worker-kubelet/`,
+`BenchmarkEncoding/kube-scheduler/`, etc.).
+
+
+To clean up, use
+```
+git clean -fx test/integration/logs/benchmark
 ```

 ## Analyzing log data
--- a/test/integration/logs/benchmark/benchmark_test.go
+++ b/test/integration/logs/benchmark/benchmark_test.go
@@ -23,7 +23,6 @@ import (
 	"io"
 	"io/fs"
 	"os"
-	"path"
 	"path/filepath"
 	"regexp"
 	"strconv"
@@ -38,10 +37,11 @@ import (
 )

 func BenchmarkEncoding(b *testing.B) {
+	seen := map[string]bool{}
+
 	// Each "data/(v[0-9]/)?*.log" file is expected to contain JSON log
 	// messages. We generate one sub-benchmark for each file where logging
-	// is tested with the log level from the directory.  Symlinks can be
-	// used to test the same file with different levels.
+	// is tested with the log level from the directory.
 	if err := filepath.Walk("data", func(path string, info fs.FileInfo, err error) error {
 		if err != nil {
 			return err
@@ -53,8 +53,14 @@ func BenchmarkEncoding(b *testing.B) {
 		if err != nil {
 			return err
 		}
-		if info.Mode()&fs.ModeSymlink == 0 {
-			b.Log(path + "\n" + stats.String())
+		// Only print unique file statistics. They get shown for the
+		// first file where new statistics are encountered. The
+		// assumption here is that the there are no files with
+		// different content and exactly the same statistics.
+		statsStr := stats.String()
+		if !seen[statsStr] {
+			b.Log(path + "\n" + statsStr)
+			seen[statsStr] = true
 		}
 		b.Run(strings.TrimSuffix(strings.TrimPrefix(path, "data/"), ".log"), func(b *testing.B) {
 			// Take verbosity threshold from directory, if present.
@@ -108,7 +114,7 @@ func BenchmarkEncoding(b *testing.B) {
 				fileSizes["JSON"] = int(output) / b.N
 			})

-			b.Log(fmt.Sprintf("file sizes: %v\n", fileSizes))
+			b.Log(fmt.Sprintf("%s: file sizes: %v\n", path, fileSizes))
 		})
 		return nil
 	}); err != nil {
@@ -164,7 +170,7 @@ func benchmarkOutputFormats(b *testing.B, config loadGeneratorConfig, discard bo
 		var out *os.File
 		if !discard {
 			var err error
-			out, err = os.Create(path.Join(tmpDir, "all.log"))
+			out, err = os.Create(filepath.Join(tmpDir, "all.log"))
 			if err != nil {
 				b.Fatal(err)
 			}
@@ -177,12 +183,12 @@ func benchmarkOutputFormats(b *testing.B, config loadGeneratorConfig, discard bo
 		var out1, out2 *os.File
 		if !discard {
 			var err error
-			out1, err = os.Create(path.Join(tmpDir, "stream-1.log"))
+			out1, err = os.Create(filepath.Join(tmpDir, "stream-1.log"))
 			if err != nil {
 				b.Fatal(err)
 			}
 			defer out1.Close()
-			out2, err = os.Create(path.Join(tmpDir, "stream-2.log"))
+			out2, err = os.Create(filepath.Join(tmpDir, "stream-2.log"))
 			if err != nil {
 				b.Fatal(err)
 			}
--- a/test/integration/logs/benchmark/data/container.log
+++ b/test/integration/logs/benchmark/data/container.log
@@ -1,2 +1,2 @@
 # This is a manually created message. See https://github.com/kubernetes/kubernetes/issues/106652 for the real one.
-Nov 19 02:13:48 kind-worker2 kubelet[250]: {"ts":1637288028968.0125,"caller":"kuberuntime/kuberuntime_manager.go:902","msg":"Creating container in pod","v":0,"container":{"Name":"terminate-cmd-rpn","Image":"registry.k8s.io/e2e-test-images/busybox:1.29-2","Command":["sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n"],"TerminationMessagePath":"/dev/termination-log"}}
+{"ts":1637288028968.0125,"caller":"kuberuntime/kuberuntime_manager.go:902","msg":"Creating container in pod","v":0,"container":{"Name":"terminate-cmd-rpn","Image":"registry.k8s.io/e2e-test-images/busybox:1.29-2","Command":["sh -c \nf=/restart-count/restartCount\ncount=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})\nif [ $count -eq 1 ]; then\n\texit 1\nfi\nif [ $count -eq 2 ]; then\n\texit 0\nfi\nwhile true; do sleep 1; done\n"],"TerminationMessagePath":"/dev/termination-log"}}
--- a/test/integration/logs/benchmark/data/split.log
+++ b/test/integration/logs/benchmark/data/split.log
@@ -0,0 +1,3 @@
+{"v": 0,
+"msg": "Pod status updated"}
+{"v": 0, "msg": "Pod status updated again"}
--- a/test/integration/logs/benchmark/get-logs.sh
+++ b/test/integration/logs/benchmark/get-logs.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+# Copyright 2018 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Usage: get-logs.sh [<job ID>]
+#
+# Downloads the latest job output or the one with the specified ID
+# and prepares running benchmarks for it.
+
+set -o pipefail
+set -o errexit
+set -x
+
+cd "$(dirname "$0")"
+
+latest_job () {
+    gsutil cat gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/latest-build.txt
+}
+
+job=${1:-$(latest_job)}
+
+rm -rf ci-kubernetes-kind-e2e-json-logging
+mkdir ci-kubernetes-kind-e2e-json-logging
+gsutil -m cp -R "gs://kubernetes-jenkins/logs/ci-kubernetes-kind-e2e-json-logging/${job}/*" ci-kubernetes-kind-e2e-json-logging/
+
+for i in kube-apiserver kube-controller-manager kube-scheduler; do
+    # Before (container runtime log dump (?)):
+    #   2023-03-07T07:30:52.193301924Z stderr F {"ts":1678174252192.0676,"caller":"scheduler/schedule_one.go:81","msg":"Attempting to schedule pod","v":3,"pod":{"name":"simpletest.rc-zgd47","namespace":"gc-5422"}}
+    # After:
+    #   {"ts":1678174252192.0676,"caller":"scheduler/schedule_one.go:81","msg":"Attempting to schedule pod","v":3,"pod":{"name":"simpletest.rc-zgd47","namespace":"gc-5422"}}
+    sed -e 's/^20[^ ]* stderr . //' \
+        ci-kubernetes-kind-e2e-json-logging/artifacts/kind-control-plane/containers/$i-*.log \
+        > ci-kubernetes-kind-e2e-json-logging/$i.log;
+done
+
+# Before (systemd format):
+#   Mar 07 07:22:05 kind-control-plane kubelet[288]: {"ts":1678173725722.4487,"caller":"flag/flags.go:64","msg":"FLAG: --address=\"0.0.0.0\"\n","v":1}
+# After:
+#   {"ts":1678173725722.4487,"caller":"flag/flags.go:64","msg":"FLAG: --address=\"0.0.0.0\"\n","v":1}
+grep 'kind-worker kubelet' ci-kubernetes-kind-e2e-json-logging/artifacts/kind-worker/kubelet.log | \
+    sed -e 's;^.* kind-worker kubelet[^ ]*: ;;' > ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log
+
+# Create copies of the actual files, whether they already exist or not. To
+# clean up disk space, use "git clean -fx test/integration/logs/benchmark".
+copy () {
+    from="$1"
+    to="$2"
+
+    mkdir -p "$(dirname "$to")"
+    rm -f "$to"
+    cp "$from" "$to"
+}
+copy ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log data/kind-worker-kubelet.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-apiserver.log data/kube-apiserver.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-controller-manager.log data/kube-controller-manager.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-scheduler.log data/kube-scheduler.log
+
+copy ci-kubernetes-kind-e2e-json-logging/kind-worker-kubelet.log data/v3/kind-worker-kubelet.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-apiserver.log data/v3/kube-apiserver.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-controller-manager.log data/v3/kube-controller-manager.log
+copy ci-kubernetes-kind-e2e-json-logging/kube-scheduler.log data/v3/kube-scheduler.log
--- a/test/integration/logs/benchmark/load.go
+++ b/test/integration/logs/benchmark/load.go
@@ -29,7 +29,7 @@ import (
 	"strings"
 	"text/template"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/klog/v2"
 )

@@ -52,7 +52,7 @@ const (
 )

 type logStats struct {
-	TotalLines, JsonLines, ErrorMessages int
+	TotalLines, JsonLines, SplitLines, ErrorMessages int

 	ArgCounts     map[string]int
 	OtherLines    []string
@@ -73,10 +73,11 @@ var (
 			return x - y
 		},
 	}).Parse(`Total number of lines: {{.TotalLines}}
+JSON line continuation: {{.SplitLines}}
 Valid JSON messages: {{.JsonLines}} ({{percent .JsonLines .TotalLines}} of total lines)
 Error messages: {{.ErrorMessages}} ({{percent .ErrorMessages .JsonLines}} of valid JSON messages)
-Unrecognized lines: {{sub .TotalLines .JsonLines}}
-{{range .OtherLines}} {{.}}
+Unrecognized lines: {{sub (sub .TotalLines .JsonLines) .SplitLines}}
+{{range .OtherLines}} {{if gt (len .) 80}}{{slice . 0 80}}{{else}}{{.}}{{end}}
 {{end}}
 Args:
 total: {{if .ArgCounts.total}}{{.ArgCounts.total}}{{else}}0{{end}}{{if .ArgCounts.string}}
@@ -119,14 +120,28 @@ func loadLog(path string) (messages []logMessage, stats logStats, err error) {

 	stats.ArgCounts = map[string]int{}
 	scanner := bufio.NewScanner(file)
+	var buffer bytes.Buffer
 	for lineNo := 0; scanner.Scan(); lineNo++ {
+		stats.TotalLines++
 		line := scanner.Bytes()
-		msg, err := parseLine(line, &stats)
+		buffer.Write(line)
+		msg, err := parseLine(buffer.Bytes(), &stats)
 		if err != nil {
+			// JSON might have been split across multiple lines.
+			var jsonErr *json.SyntaxError
+			if errors.As(err, &jsonErr) && jsonErr.Offset > 1 {
+				// The start of the buffer was okay. Keep the
+				// data and add the next line to it.
+				stats.SplitLines++
+				continue
+			}
 			stats.OtherLines = append(stats.OtherLines, fmt.Sprintf("%d: %s", lineNo, string(line)))
+			buffer.Reset()
 			continue
 		}
+		stats.JsonLines++
 		messages = append(messages, msg)
+		buffer.Reset()
 	}

 	if err := scanner.Err(); err != nil {
@@ -136,26 +151,16 @@ func loadLog(path string) (messages []logMessage, stats logStats, err error) {
 	return
 }

-// systemd prefix:
-// Nov 19 02:08:51 kind-worker2 kubelet[250]: {"ts":1637287731687.8315,...
-//
-// kubectl (?) prefix:
-// 2021-11-19T02:08:28.475825534Z stderr F {"ts": ...
-var prefixRE = regexp.MustCompile(`^\w+ \d+ \S+ \S+ \S+: |\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z stderr . `)
-
 // String format for API structs from generated.pb.go.
 // &Container{...}
 var objectRE = regexp.MustCompile(`^&([a-zA-Z]*)\{`)

 func parseLine(line []byte, stats *logStats) (item logMessage, err error) {
-	stats.TotalLines++
-	line = prefixRE.ReplaceAll(line, nil)

 	content := map[string]interface{}{}
 	if err := json.Unmarshal(line, &content); err != nil {
-		return logMessage{}, fmt.Errorf("JSON parsing failed: %v", err)
+		return logMessage{}, fmt.Errorf("JSON parsing failed: %w", err)
 	}
-	stats.JsonLines++

 	kvs := map[string]interface{}{}
 	item.isError = true
--- a/test/integration/logs/benchmark/load_test.go
+++ b/test/integration/logs/benchmark/load_test.go
@@ -58,6 +58,31 @@ func TestData(t *testing.T) {
 				ArgCounts:  map[string]int{},
 			},
 		},
+		"data/split.log": {
+			messages: []logMessage{
+				{
+					msg: "Pod status updated",
+				},
+				{
+					msg: "Pod status updated again",
+				},
+			},
+			printf: `Pod status updated: []
+Pod status updated again: []
+`,
+			structured: `"Pod status updated"
+"Pod status updated again"
+`,
+			json: `{"msg":"Pod status updated","v":0}
+{"msg":"Pod status updated again","v":0}
+`,
+			stats: logStats{
+				TotalLines: 3,
+				SplitLines: 1,
+				JsonLines:  2,
+				ArgCounts:  map[string]int{},
+			},
+		},
 		"data/error.log": {
 			messages: []logMessage{
 				{