From 7fe371a97450c3f0732d90bc2387c6e2b4c7b20b Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 8 Feb 2023 13:58:45 -0800 Subject: [PATCH] test: Simplify NPD start timestamp calculation The NPD test checks when NPD started to determine if it is needed to check the kubelet start event. The current logic requires parsing the journalctl logs which is quite fragile and is broken now because of systemd changing the expected log format. Newer versions of systemd do not print "end at" or "logs begin at" and instead may print "No entries", which will result in the test panicking. ``` $ journalctl -u foo.service -- No entries -- ``` For units started, it will not print "end at" or "logs begin at": ``` root@ubuntu-jammy:~# journalctl -u foo.service Feb 08 22:02:19 ubuntu-jammy systemd[1]: Started /usr/bin/sleep 1s. Feb 08 22:02:20 ubuntu-jammy systemd[1]: foo.service: Deactivated successfully. ``` To avoid relying on log parsing which is fragile, let's instead directly ask systemd when the NPD service started and parse the resulting timestamp. Signed-off-by: David Porter --- test/e2e/node/node_problem_detector.go | 48 +++++++------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/test/e2e/node/node_problem_detector.go b/test/e2e/node/node_problem_detector.go index af5e7914e1f..a07449fa267 100644 --- a/test/e2e/node/node_problem_detector.go +++ b/test/e2e/node/node_problem_detector.go @@ -131,44 +131,20 @@ var _ = SIGDescribe("NodeProblemDetector", func() { framework.ExpectEqual(result.Code, 0) gomega.Expect(result.Stdout).NotTo(gomega.ContainSubstring("node-problem-detector.service: Failed")) - // Let's assume that node problem detector has started the same time as kubelet - // We only will check for the KubeletStart if parsing of date here succeed - // This is an optimization to not ssh one more time to get kubelet's start time. - // Also we assume specific datetime format to simplify the logic - output := result.Stdout + // We only will check for the KubeletStart even if parsing of date here succeeded. + ginkgo.By(fmt.Sprintf("Check when node-problem-detector started on node %q", host)) + npdStartTimeCommand := "sudo systemctl show --timestamp=utc node-problem-detector -P ActiveEnterTimestamp" + result, err = e2essh.SSH(ctx, npdStartTimeCommand, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + framework.ExpectEqual(result.Code, 0) - // searching for the line "Apr 14 04:47:42 gke-cluster-1-default-pool-b1565719-eqht systemd[1]: Started Kubernetes node problem detector." and fallback to - idx := strings.Index(output, "Started Kubernetes node problem detector") - if idx != -1 { - output = output[:idx] - idx = strings.LastIndex(output, "\n") - - if idx != -1 { - output = output[0:15] - } - - st, err := time.Parse("Jan 02 15:04:05", output) - st = st.AddDate(time.Now().Year(), 0, 0) - - if err == nil { - checkForKubeletStart = time.Since(st) < time.Hour - } + // The time format matches the systemd format. + // 'utc': 'Day YYYY-MM-DD HH:MM:SS UTC (see https://www.freedesktop.org/software/systemd/man/systemd.time.html) + st, err := time.Parse("Mon 2006-01-02 15:04:05 MST", result.Stdout) + if err != nil { + framework.Logf("Failed to parse when NPD started. Got exit code: %v and stdout: %v, error: %v. Will skip check for kubelet start event.", result.Code, result.Stdout, err) } else { - // fallback to searching the line: - // -- Logs begin at Thu 2022-04-28 17:32:39 UTC, end at Thu 2022-04-28 17:40:05 UTC. -- - idx := strings.Index(output, ", end at ") - - output = output[:idx] - idx = strings.LastIndex(output, "-- Logs begin at ") - if idx != -1 { - output = output[17:] - } - - st, err := time.Parse("Mon 2006-01-02 15:04:05 MST", output) - - if err == nil { - checkForKubeletStart = time.Since(st) < time.Hour - } + checkForKubeletStart = time.Since(st) < time.Hour } cpuUsage, uptime := getCPUStat(ctx, f, host)