test: Simplify NPD start timestamp calculation

The NPD test checks when NPD started to determine if it is needed to
check the kubelet start event. The current logic requires parsing the
journalctl logs which is quite fragile and is broken now because of
systemd changing the expected log format.

Newer versions of systemd do not print "end at" or "logs begin at" and
instead may print "No entries", which will result in the test panicking.

```
$ journalctl -u foo.service
-- No entries --
```

For units started, it will not print "end at" or "logs begin at":

```
root@ubuntu-jammy:~# journalctl -u foo.service
Feb 08 22:02:19 ubuntu-jammy systemd[1]: Started /usr/bin/sleep 1s.
Feb 08 22:02:20 ubuntu-jammy systemd[1]: foo.service: Deactivated successfully.
```

To avoid relying on log parsing which is fragile, let's instead directly
ask systemd when the NPD service started and parse the resulting
timestamp.

Signed-off-by: David Porter <david@porter.me>
This commit is contained in:
David Porter 2023-02-08 13:58:45 -08:00
parent f58f70bd57
commit 7fe371a974

View File

@ -131,44 +131,20 @@ var _ = SIGDescribe("NodeProblemDetector", func() {
framework.ExpectEqual(result.Code, 0)
gomega.Expect(result.Stdout).NotTo(gomega.ContainSubstring("node-problem-detector.service: Failed"))
// Let's assume that node problem detector has started the same time as kubelet
// We only will check for the KubeletStart if parsing of date here succeed
// This is an optimization to not ssh one more time to get kubelet's start time.
// Also we assume specific datetime format to simplify the logic
output := result.Stdout
// We only will check for the KubeletStart even if parsing of date here succeeded.
ginkgo.By(fmt.Sprintf("Check when node-problem-detector started on node %q", host))
npdStartTimeCommand := "sudo systemctl show --timestamp=utc node-problem-detector -P ActiveEnterTimestamp"
result, err = e2essh.SSH(ctx, npdStartTimeCommand, host, framework.TestContext.Provider)
framework.ExpectNoError(err)
framework.ExpectEqual(result.Code, 0)
// searching for the line "Apr 14 04:47:42 gke-cluster-1-default-pool-b1565719-eqht systemd[1]: Started Kubernetes node problem detector." and fallback to
idx := strings.Index(output, "Started Kubernetes node problem detector")
if idx != -1 {
output = output[:idx]
idx = strings.LastIndex(output, "\n")
if idx != -1 {
output = output[0:15]
}
st, err := time.Parse("Jan 02 15:04:05", output)
st = st.AddDate(time.Now().Year(), 0, 0)
if err == nil {
checkForKubeletStart = time.Since(st) < time.Hour
}
// The time format matches the systemd format.
// 'utc': 'Day YYYY-MM-DD HH:MM:SS UTC (see https://www.freedesktop.org/software/systemd/man/systemd.time.html)
st, err := time.Parse("Mon 2006-01-02 15:04:05 MST", result.Stdout)
if err != nil {
framework.Logf("Failed to parse when NPD started. Got exit code: %v and stdout: %v, error: %v. Will skip check for kubelet start event.", result.Code, result.Stdout, err)
} else {
// fallback to searching the line:
// -- Logs begin at Thu 2022-04-28 17:32:39 UTC, end at Thu 2022-04-28 17:40:05 UTC. --
idx := strings.Index(output, ", end at ")
output = output[:idx]
idx = strings.LastIndex(output, "-- Logs begin at ")
if idx != -1 {
output = output[17:]
}
st, err := time.Parse("Mon 2006-01-02 15:04:05 MST", output)
if err == nil {
checkForKubeletStart = time.Since(st) < time.Hour
}
checkForKubeletStart = time.Since(st) < time.Hour
}
cpuUsage, uptime := getCPUStat(ctx, f, host)