From d12696c20fd3dd68b8021c813a460818851b9b0b Mon Sep 17 00:00:00 2001 From: Aravindh Puthiyaparambil Date: Thu, 23 Feb 2023 13:55:07 -0800 Subject: [PATCH] kubelet: Expose simple journald and Get-WinEvent shims on the logs endpoint Provide an administrator a streaming view of journal logs on Linux systems using journalctl, and event logs on Windows systems using the Get-WinEvent PowerShell cmdlet without them having to implement a client side reader. Only available to cluster admins. The implementation for journald on Linux was originally done by Clayton Coleman. Introduce a heuristics approach to query logs The logs query for node objects will follow a heuristics approach when asked to query for logs from a service. If asked to get the logs from a service foobar, it will first check if foobar logs to the native OS service log provider. If unable to get logs from these, it will attempt to get logs from /var/foobar, /var/log/foobar.log or /var/log/foobar/foobar.log in that order. The logs sub-command can also directly serve a file if the query looks like a file. Co-authored-by: Clayton Coleman Co-authored-by: Christian Glombek --- go.mod | 2 +- pkg/kubelet/kubelet.go | 40 +- pkg/kubelet/kubelet_server_journal.go | 444 ++++++++++++++++++ pkg/kubelet/kubelet_server_journal_linux.go | 74 +++ pkg/kubelet/kubelet_server_journal_others.go | 34 ++ pkg/kubelet/kubelet_server_journal_test.go | 215 +++++++++ pkg/kubelet/kubelet_server_journal_windows.go | 86 ++++ pkg/kubelet/server/server.go | 25 +- 8 files changed, 908 insertions(+), 12 deletions(-) create mode 100644 pkg/kubelet/kubelet_server_journal.go create mode 100644 pkg/kubelet/kubelet_server_journal_linux.go create mode 100644 pkg/kubelet/kubelet_server_journal_others.go create mode 100644 pkg/kubelet/kubelet_server_journal_test.go create mode 100644 pkg/kubelet/kubelet_server_journal_windows.go diff --git a/go.mod b/go.mod index aac1b9396ff..e982b5dfcbd 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/coreos/go-oidc v2.1.0+incompatible github.com/coreos/go-systemd/v22 v22.4.0 github.com/cpuguy83/go-md2man/v2 v2.0.2 + github.com/cyphar/filepath-securejoin v0.2.3 github.com/davecgh/go-spew v1.1.1 github.com/docker/distribution v2.8.1+incompatible github.com/docker/go-units v0.5.0 @@ -153,7 +154,6 @@ require ( github.com/containerd/ttrpc v1.1.0 // indirect github.com/coredns/caddy v1.1.0 // indirect github.com/coreos/go-semver v0.3.0 // indirect - github.com/cyphar/filepath-securejoin v0.2.3 // indirect github.com/daviddengcn/go-colortext v1.0.0 // indirect github.com/dustin/go-humanize v1.0.0 // indirect github.com/euank/go-kmsg-parser v2.0.0+incompatible // indirect diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 302c70df015..59841f89525 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -31,14 +31,12 @@ import ( "sync/atomic" "time" - "github.com/opencontainers/selinux/go-selinux" - - "k8s.io/client-go/informers" - cadvisorapi "github.com/google/cadvisor/info/v1" libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" + "github.com/opencontainers/selinux/go-selinux" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "k8s.io/client-go/informers" "k8s.io/mount-utils" "k8s.io/utils/integer" @@ -1509,7 +1507,39 @@ func (kl *Kubelet) initializeRuntimeDependentModules() { func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) { ctx := context.Background() if kl.logServer == nil { - kl.logServer = http.StripPrefix("/logs/", http.FileServer(http.Dir("/var/log/"))) + file := http.FileServer(http.Dir(nodeLogDir)) + if utilfeature.DefaultFeatureGate.Enabled(features.NodeLogQuery) && kl.kubeletConfiguration.EnableSystemLogQuery { + kl.logServer = http.StripPrefix("/logs/", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + if nlq, errs := newNodeLogQuery(req.URL.Query()); len(errs) > 0 { + http.Error(w, errs.ToAggregate().Error(), http.StatusBadRequest) + return + } else if nlq != nil { + if req.URL.Path != "/" && req.URL.Path != "" { + http.Error(w, "path not allowed in query mode", http.StatusNotAcceptable) + return + } + if errs := nlq.validate(); len(errs) > 0 { + http.Error(w, errs.ToAggregate().Error(), http.StatusNotAcceptable) + return + } + // Validation ensures that the request does not query services and files at the same time + if len(nlq.Services) > 0 { + journal.ServeHTTP(w, req) + return + } + // Validation ensures that the request does not explicitly query multiple files at the same time + if len(nlq.Files) == 1 { + // Account for the \ being used on Windows clients + req.URL.Path = filepath.ToSlash(nlq.Files[0]) + } + } + // Fall back in case the caller is directly trying to query a file + // Example: kubectl get --raw /api/v1/nodes/$name/proxy/logs/foo.log + file.ServeHTTP(w, req) + })) + } else { + kl.logServer = http.StripPrefix("/logs/", file) + } } if kl.kubeClient == nil { klog.InfoS("No API server defined - no node status update will be sent") diff --git a/pkg/kubelet/kubelet_server_journal.go b/pkg/kubelet/kubelet_server_journal.go new file mode 100644 index 00000000000..bf3114519c6 --- /dev/null +++ b/pkg/kubelet/kubelet_server_journal.go @@ -0,0 +1,444 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "compress/gzip" + "context" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/exec" + "reflect" + "regexp" + "regexp/syntax" + "runtime" + "strconv" + "strings" + "time" + + securejoin "github.com/cyphar/filepath-securejoin" + + utilvalidation "k8s.io/apimachinery/pkg/util/validation" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +const ( + dateLayout = "2006-1-2 15:4:5" + maxTailLines = 100000 + maxServiceLength = 256 + maxServices = 4 + nodeLogDir = "/var/log/" +) + +var ( + journal = journalServer{} + // The set of known safe characters to pass to journalctl / GetWinEvent flags - only add to this list if the + // character cannot be used to create invalid sequences. This is intended as a broad defense against malformed + // input that could cause an escape. + reServiceNameUnsafeCharacters = regexp.MustCompile(`[^a-zA-Z\-_0-9@]+`) +) + +// journalServer returns text output from the OS specific service logger to view +// from the client. It runs with the privileges of the calling process +// (the kubelet) and should only be allowed to be invoked by a root user. +type journalServer struct{} + +// ServeHTTP translates HTTP query parameters into arguments to be passed +// to journalctl on the current system. It supports content-encoding of +// gzip to reduce total content size. +func (journalServer) ServeHTTP(w http.ResponseWriter, req *http.Request) { + var out io.Writer = w + + nlq, errs := newNodeLogQuery(req.URL.Query()) + if len(errs) > 0 { + http.Error(w, errs.ToAggregate().Error(), http.StatusBadRequest) + return + } + + // TODO: Also set a response header that indicates how the request's query was resolved, + // e.g. "kube-log-source: journal://foobar?arg1=value" or "kube-log-source: file:///var/log/foobar.log" + w.Header().Set("Content-Type", "text/plain;charset=UTF-8") + if req.Header.Get("Accept-Encoding") == "gzip" { + w.Header().Set("Content-Encoding", "gzip") + + gz, err := gzip.NewWriterLevel(out, gzip.BestSpeed) + if err != nil { + fmt.Fprintf(w, "\nfailed to get gzip writer: %v\n", err) + return + } + defer gz.Close() + out = gz + } + nlq.Copy(out) +} + +// nodeLogQuery encapsulates the log query request +type nodeLogQuery struct { + // Services are the list of services to be queried + Services []string + // Files are the list of files + Files []string + options +} + +// options encapsulates the query options for services +type options struct { + // SinceTime is an RFC3339 timestamp from which to show logs. + SinceTime *time.Time + // UntilTime is an RFC3339 timestamp until which to show logs. + UntilTime *time.Time + // TailLines is used to retrieve the specified number of lines (not more than 100k) from the end of the log. + // Support for this is implementation specific and only available for service logs. + TailLines *int + // Boot show messages from a specific boot. Allowed values are less than 1. Passing an invalid boot offset will fail + // retrieving logs and return an error. Support for this is implementation specific + Boot *int + // Pattern filters log entries by the provided regex pattern. On Linux nodes, this pattern will be read as a + // PCRE2 regex, on Windows nodes it will be read as a PowerShell regex. Support for this is implementation specific. + Pattern string +} + +// newNodeLogQuery parses query values and converts all known options into nodeLogQuery +func newNodeLogQuery(query url.Values) (*nodeLogQuery, field.ErrorList) { + allErrs := field.ErrorList{} + var nlq nodeLogQuery + var err error + + queries, ok := query["query"] + if len(queries) > 0 { + for _, q := range queries { + // The presence of / or \ is a hint that the query is for a log file. If the query is for foo.log without a + // slash prefix, the heuristics will still return the file contents. + if strings.ContainsAny(q, `/\`) { + nlq.Files = append(nlq.Files, q) + } else if strings.TrimSpace(q) != "" { // Prevent queries with just spaces + nlq.Services = append(nlq.Services, q) + } + } + } + + // Prevent specifying an empty or blank space query. + // Example: kubectl get --raw /api/v1/nodes/$node/proxy/logs?query=" " + if ok && (len(nlq.Files) == 0 && len(nlq.Services) == 0) { + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), queries, "query cannot be empty")) + } + + var sinceTime time.Time + sinceTimeValue := query.Get("sinceTime") + if len(sinceTimeValue) > 0 { + sinceTime, err = time.Parse(time.RFC3339, sinceTimeValue) + if err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("sinceTime"), sinceTimeValue, "invalid time format")) + } else { + nlq.SinceTime = &sinceTime + } + } + + var untilTime time.Time + untilTimeValue := query.Get("untilTime") + if len(untilTimeValue) > 0 { + untilTime, err = time.Parse(time.RFC3339, untilTimeValue) + if err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("untilTime"), untilTimeValue, "invalid time format")) + } else { + nlq.UntilTime = &untilTime + } + } + + var boot int + bootValue := query.Get("boot") + if len(bootValue) > 0 { + boot, err = strconv.Atoi(bootValue) + if err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("boot"), bootValue, err.Error())) + } else { + nlq.Boot = &boot + } + } + + var tailLines int + tailLinesValue := query.Get("tailLines") + if len(tailLinesValue) > 0 { + tailLines, err = strconv.Atoi(tailLinesValue) + if err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("tailLines"), tailLinesValue, err.Error())) + } else { + nlq.TailLines = &tailLines + } + } + + pattern := query.Get("pattern") + if len(pattern) > 0 { + nlq.Pattern = pattern + } + + if len(allErrs) > 0 { + return nil, allErrs + } + + if reflect.DeepEqual(nlq, nodeLogQuery{}) { + return nil, allErrs + } + + return &nlq, allErrs +} + +func validateServices(services []string) field.ErrorList { + allErrs := field.ErrorList{} + + for _, s := range services { + if err := safeServiceName(s); err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), s, err.Error())) + } + } + + if len(services) > maxServices { + allErrs = append(allErrs, field.TooMany(field.NewPath("query"), len(services), maxServices)) + } + return allErrs +} + +func (n *nodeLogQuery) validate() field.ErrorList { + allErrs := validateServices(n.Services) + switch { + case len(n.Files) == 0 && len(n.Services) == 0: + allErrs = append(allErrs, field.Required(field.NewPath("query"), "cannot be empty with options")) + case len(n.Files) > 0 && len(n.Services) > 0: + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), fmt.Sprintf("%v, %v", n.Files, n.Services), + "cannot specify a file and service")) + case len(n.Files) > 1: + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), n.Files, "cannot specify more than one file")) + case len(n.Files) == 1 && n.options != (options{}): + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), n.Files, "cannot specify file with options")) + case len(n.Files) == 1: + if fullLogFilename, err := securejoin.SecureJoin(nodeLogDir, n.Files[0]); err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), n.Files, err.Error())) + } else if _, err := os.Stat(fullLogFilename); err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("query"), n.Files, err.Error())) + } + } + + if n.SinceTime != nil && n.UntilTime != nil && (n.SinceTime.After(*n.UntilTime)) { + allErrs = append(allErrs, field.Invalid(field.NewPath("untilTime"), n.UntilTime, "must be after `sinceTime`")) + } + + if n.Boot != nil && runtime.GOOS == "windows" { + allErrs = append(allErrs, field.Invalid(field.NewPath("boot"), *n.Boot, "boot is not supported on Windows")) + } + + if n.Boot != nil && *n.Boot > 0 { + allErrs = append(allErrs, field.Invalid(field.NewPath("boot"), *n.Boot, "must be less than 1")) + } + + if n.TailLines != nil { + if err := utilvalidation.IsInRange((int)(*n.TailLines), 0, maxTailLines); err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("tailLines"), *n.TailLines, err[0])) + } + } + + if _, err := syntax.Parse(n.Pattern, syntax.Perl); err != nil { + allErrs = append(allErrs, field.Invalid(field.NewPath("pattern"), n.Pattern, err.Error())) + } + + return allErrs +} + +// Copy streams the contents of the OS specific logging command executed with the current args to the provided +// writer. If an error occurs a line is written to the output. +func (n *nodeLogQuery) Copy(w io.Writer) { + // set the deadline to the maximum across both runs + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(30*time.Second)) + defer cancel() + boot := int(0) + if n.Boot != nil { + boot = *n.Boot + } + n.copyForBoot(ctx, w, boot) +} + +// copyForBoot invokes the OS specific logging command with the provided args +func (n *nodeLogQuery) copyForBoot(ctx context.Context, w io.Writer, previousBoot int) { + if ctx.Err() != nil { + return + } + nativeLoggers, fileLoggers := n.splitNativeVsFileLoggers(ctx) + if len(nativeLoggers) > 0 { + n.copyServiceLogs(ctx, w, nativeLoggers, previousBoot) + } + + if len(fileLoggers) > 0 && n.options != (options{}) { + fmt.Fprintf(w, "\noptions present and query resolved to log files for %v\ntry without specifying options\n", + fileLoggers) + return + } + + if len(fileLoggers) > 0 { + copyFileLogs(ctx, w, fileLoggers) + } +} + +// splitNativeVsFileLoggers checks if each service logs to native OS logs or to a file and returns a list of services +// that log natively vs maybe to a file +func (n *nodeLogQuery) splitNativeVsFileLoggers(ctx context.Context) ([]string, []string) { + var nativeLoggers []string + var fileLoggers []string + + for _, service := range n.Services { + // Check the journalctl output to figure if the service is using journald or not. This is not needed in the + // Get-WinEvent case as the command returns an error if a service is not logging to the Application provider. + if checkForNativeLogger(ctx, service) { + nativeLoggers = append(nativeLoggers, service) + } else { + fileLoggers = append(fileLoggers, service) + } + } + return nativeLoggers, fileLoggers +} + +// copyServiceLogs invokes journalctl or Get-WinEvent with the provided args. Note that +// services are explicitly passed here to account for the heuristics. +func (n *nodeLogQuery) copyServiceLogs(ctx context.Context, w io.Writer, services []string, previousBoot int) { + cmdStr, args, err := getLoggingCmd(n, services) + if err != nil { + fmt.Fprintf(w, "\nfailed to get logging cmd: %v\n", err) + return + } + cmd := exec.CommandContext(ctx, cmdStr, args...) + cmd.Stdout = w + cmd.Stderr = w + + if err := cmd.Run(); err != nil { + if _, ok := err.(*exec.ExitError); ok { + return + } + if previousBoot == 0 { + fmt.Fprintf(w, "\nerror: journal output not available\n") + } + } +} + +// copyFileLogs loops over all the services and attempts to collect the file logs of each service +func copyFileLogs(ctx context.Context, w io.Writer, services []string) { + if ctx.Err() != nil { + fmt.Fprintf(w, "\ncontext error: %v\n", ctx.Err()) + return + } + + for _, service := range services { + heuristicsCopyFileLogs(ctx, w, service) + } +} + +// heuristicsCopyFileLogs attempts to collect logs from either +// /var/log/service +// /var/log/service.log or +// /var/log/service/service.log or +// in that order stopping on first success. +func heuristicsCopyFileLogs(ctx context.Context, w io.Writer, service string) { + logFileNames := [3]string{ + fmt.Sprintf("%s", service), + fmt.Sprintf("%s.log", service), + fmt.Sprintf("%s/%s.log", service, service), + } + + var err error + for _, logFileName := range logFileNames { + var logFile string + logFile, err = securejoin.SecureJoin(nodeLogDir, logFileName) + if err != nil { + break + } + err = heuristicsCopyFileLog(ctx, w, logFile) + if err == nil { + break + } else if errors.Is(err, os.ErrNotExist) { + continue + } else { + break + } + } + + if err != nil { + // If the last error was file not found it implies that no log file was found for the service + if errors.Is(err, os.ErrNotExist) { + fmt.Fprintf(w, "\nlog not found for %s\n", service) + return + } + fmt.Fprintf(w, "\nerror getting log for %s: %v\n", service, err) + } +} + +// readerCtx is the interface that wraps io.Reader with a context +type readerCtx struct { + ctx context.Context + io.Reader +} + +func (r *readerCtx) Read(p []byte) (n int, err error) { + if err := r.ctx.Err(); err != nil { + return 0, err + } + return r.Reader.Read(p) +} + +// newReaderCtx gets a context-aware io.Reader +func newReaderCtx(ctx context.Context, r io.Reader) io.Reader { + return &readerCtx{ + ctx: ctx, + Reader: r, + } +} + +// heuristicsCopyFileLog returns the contents of the given logFile +func heuristicsCopyFileLog(ctx context.Context, w io.Writer, logFile string) error { + fInfo, err := os.Stat(logFile) + if err != nil { + return err + } + // This is to account for the heuristics where logs for service foo + // could be in /var/log/foo/ + if fInfo.IsDir() { + return os.ErrNotExist + } + + f, err := os.Open(logFile) + if err != nil { + return err + } + defer f.Close() + + if _, err := io.Copy(w, newReaderCtx(ctx, f)); err != nil { + return err + } + return nil +} + +func safeServiceName(s string) error { + // Max length of a service name is 256 across supported OSes + if len(s) > maxServiceLength { + return fmt.Errorf("length must be less than 100") + } + + if reServiceNameUnsafeCharacters.MatchString(s) { + return fmt.Errorf("input contains unsupported characters") + } + return nil +} diff --git a/pkg/kubelet/kubelet_server_journal_linux.go b/pkg/kubelet/kubelet_server_journal_linux.go new file mode 100644 index 00000000000..29f98214713 --- /dev/null +++ b/pkg/kubelet/kubelet_server_journal_linux.go @@ -0,0 +1,74 @@ +//go:build linux + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "context" + "fmt" + "os/exec" + "strings" +) + +// getLoggingCmd returns the journalctl cmd and arguments for the given nodeLogQuery and boot. Note that +// services are explicitly passed here to account for the heuristics +func getLoggingCmd(n *nodeLogQuery, services []string) (string, []string, error) { + args := []string{ + "--utc", + "--no-pager", + "--output=short-precise", + } + if n.SinceTime != nil { + args = append(args, fmt.Sprintf("--since=%s", n.SinceTime.Format(dateLayout))) + } + if n.UntilTime != nil { + args = append(args, fmt.Sprintf("--until=%s", n.SinceTime.Format(dateLayout))) + } + if n.TailLines != nil { + args = append(args, "--pager-end", fmt.Sprintf("--lines=%d", *n.TailLines)) + } + for _, service := range services { + if len(service) > 0 { + args = append(args, "--unit="+service) + } + } + if len(n.Pattern) > 0 { + args = append(args, "--grep="+n.Pattern) + } + + if n.Boot != nil { + args = append(args, "--boot", fmt.Sprintf("%d", *n.Boot)) + } + + return "journalctl", args, nil +} + +// checkForNativeLogger checks journalctl output for a service +func checkForNativeLogger(ctx context.Context, service string) bool { + // This will return all the journald units + cmd := exec.CommandContext(ctx, "journalctl", []string{"--field", "_SYSTEMD_UNIT"}...) + output, err := cmd.CombinedOutput() + if err != nil { + // Returning false to allow checking if the service is logging to a file + return false + } + + // journalctl won't return an error if we try to fetch logs for a non-existent service, + // hence we search for it in the list of services known to journalctl + return strings.Contains(string(output), service+".service") +} diff --git a/pkg/kubelet/kubelet_server_journal_others.go b/pkg/kubelet/kubelet_server_journal_others.go new file mode 100644 index 00000000000..2f9e0ecb1a5 --- /dev/null +++ b/pkg/kubelet/kubelet_server_journal_others.go @@ -0,0 +1,34 @@ +//go:build !linux && !windows + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "context" + "errors" +) + +// getLoggingCmd on unsupported operating systems returns the echo command and a warning message (as strings) +func getLoggingCmd(n *nodeLogQuery, services []string) (string, []string, error) { + return "", []string{}, errors.New("Operating System Not Supported") +} + +// checkForNativeLogger on unsupported operating systems returns false +func checkForNativeLogger(ctx context.Context, service string) bool { + return false +} diff --git a/pkg/kubelet/kubelet_server_journal_test.go b/pkg/kubelet/kubelet_server_journal_test.go new file mode 100644 index 00000000000..e431eaaf2e0 --- /dev/null +++ b/pkg/kubelet/kubelet_server_journal_test.go @@ -0,0 +1,215 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "net/url" + "reflect" + "runtime" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" +) + +func Test_getLoggingCmd(t *testing.T) { + tests := []struct { + name string + args nodeLogQuery + wantLinux []string + wantWindows []string + wantOtherOS []string + }{ + { + args: nodeLogQuery{}, + wantLinux: []string{"--utc", "--no-pager", "--output=short-precise"}, + wantWindows: []string{"-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", "Get-WinEvent -FilterHashtable @{LogName='Application'} | Sort-Object TimeCreated | Format-Table -AutoSize -Wrap"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, got, err := getLoggingCmd(&tt.args, []string{}) + switch os := runtime.GOOS; os { + case "linux": + if !reflect.DeepEqual(got, tt.wantLinux) { + t.Errorf("getLoggingCmd() = %v, want %v", got, tt.wantLinux) + } + case "windows": + if !reflect.DeepEqual(got, tt.wantWindows) { + t.Errorf("getLoggingCmd() = %v, want %v", got, tt.wantWindows) + } + default: + if err == nil { + t.Errorf("getLoggingCmd() = %v, want err", got) + } + } + }) + } +} + +func Test_newNodeLogQuery(t *testing.T) { + validTimeValue := "2019-12-04T02:00:00Z" + validT, _ := time.Parse(time.RFC3339, validTimeValue) + tests := []struct { + name string + query url.Values + want *nodeLogQuery + wantErr bool + }{ + {name: "empty", query: url.Values{}, want: nil}, + {query: url.Values{"unknown": []string{"true"}}, want: nil}, + + {query: url.Values{"sinceTime": []string{""}}, want: nil}, + {query: url.Values{"sinceTime": []string{"2019-12-04 02:00:00"}}, wantErr: true}, + {query: url.Values{"sinceTime": []string{"2019-12-04 02:00:00.000"}}, wantErr: true}, + {query: url.Values{"sinceTime": []string{"2019-12-04 02"}}, wantErr: true}, + {query: url.Values{"sinceTime": []string{"2019-12-04 02:00"}}, wantErr: true}, + {query: url.Values{"sinceTime": []string{validTimeValue}}, + want: &nodeLogQuery{options: options{SinceTime: &validT}}}, + + {query: url.Values{"untilTime": []string{""}}, want: nil}, + {query: url.Values{"untilTime": []string{"2019-12-04 02:00:00"}}, wantErr: true}, + {query: url.Values{"untilTime": []string{"2019-12-04 02:00:00.000"}}, wantErr: true}, + {query: url.Values{"untilTime": []string{"2019-12-04 02"}}, wantErr: true}, + {query: url.Values{"untilTime": []string{"2019-12-04 02:00"}}, wantErr: true}, + {query: url.Values{"untilTime": []string{validTimeValue}}, + want: &nodeLogQuery{options: options{UntilTime: &validT}}}, + + {query: url.Values{"tailLines": []string{"100"}}, want: &nodeLogQuery{options: options{TailLines: intPtr(100)}}}, + {query: url.Values{"tailLines": []string{"foo"}}, wantErr: true}, + {query: url.Values{"tailLines": []string{" "}}, wantErr: true}, + + {query: url.Values{"pattern": []string{"foo"}}, want: &nodeLogQuery{options: options{Pattern: "foo"}}}, + + {query: url.Values{"boot": []string{""}}, want: nil}, + {query: url.Values{"boot": []string{"0"}}, want: &nodeLogQuery{options: options{Boot: intPtr(0)}}}, + {query: url.Values{"boot": []string{"-23"}}, want: &nodeLogQuery{options: options{Boot: intPtr(-23)}}}, + {query: url.Values{"boot": []string{"foo"}}, wantErr: true}, + {query: url.Values{"boot": []string{" "}}, wantErr: true}, + + {query: url.Values{"query": []string{""}}, wantErr: true}, + {query: url.Values{"query": []string{" ", " "}}, wantErr: true}, + {query: url.Values{"query": []string{"foo"}}, want: &nodeLogQuery{Services: []string{"foo"}}}, + {query: url.Values{"query": []string{"foo", "bar"}}, want: &nodeLogQuery{Services: []string{"foo", "bar"}}}, + {query: url.Values{"query": []string{"foo", "/bar"}}, want: &nodeLogQuery{Services: []string{"foo"}, + Files: []string{"/bar"}}}, + {query: url.Values{"query": []string{"/foo", `\bar`}}, want: &nodeLogQuery{Files: []string{"/foo", `\bar`}}}, + } + for _, tt := range tests { + t.Run(tt.query.Encode(), func(t *testing.T) { + got, err := newNodeLogQuery(tt.query) + if len(err) > 0 != tt.wantErr { + t.Errorf("newNodeLogQuery() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("different: %s", cmp.Diff(tt.want, got, cmpopts.IgnoreUnexported(nodeLogQuery{}))) + } + }) + } +} + +func Test_validateServices(t *testing.T) { + var ( + service1 = "svc1" + service2 = "svc2" + invalid1 = "svc\n" + invalid2 = "svc.foo\n" + ) + tests := []struct { + name string + services []string + wantErr bool + }{ + {name: "one service", services: []string{service1}}, + {name: "two services", services: []string{service1, service2}}, + {name: "invalid service new line", services: []string{invalid1}, wantErr: true}, + {name: "invalid service with dot", services: []string{invalid2}, wantErr: true}, + {name: "long service", services: []string{strings.Repeat(service1, 100)}, wantErr: true}, + {name: "max number of services", services: []string{service1, service2, service1, service2, service1}, wantErr: true}, + } + for _, tt := range tests { + errs := validateServices(tt.services) + t.Run(tt.name, func(t *testing.T) { + if len(errs) > 0 != tt.wantErr { + t.Errorf("validateServices() error = %v, wantErr %v", errs, tt.wantErr) + return + } + }) + } +} + +func Test_nodeLogQuery_validate(t *testing.T) { + var ( + service1 = "svc1" + service2 = "svc2" + file1 = "/test1.log" + file2 = "/test2.log" + pattern = "foo" + invalid = "foo\\" + ) + since, err := time.Parse(time.RFC3339, "2023-01-04T02:00:00Z") + assert.NoError(t, err) + until, err := time.Parse(time.RFC3339, "2023-02-04T02:00:00Z") + assert.NoError(t, err) + + tests := []struct { + name string + Services []string + Files []string + options options + wantErr bool + }{ + {name: "empty", wantErr: true}, + {name: "empty with options", options: options{SinceTime: &since}, wantErr: true}, + {name: "one service", Services: []string{service1}}, + {name: "two services", Services: []string{service1, service2}}, + {name: "one service one file", Services: []string{service1}, Files: []string{file1}, wantErr: true}, + {name: "two files", Files: []string{file1, file2}, wantErr: true}, + {name: "one file options", Files: []string{file1}, options: options{Pattern: pattern}, wantErr: true}, + {name: "invalid pattern", Services: []string{service1}, options: options{Pattern: invalid}, wantErr: true}, + {name: "since", Services: []string{service1}, options: options{SinceTime: &since}}, + {name: "until", Services: []string{service1}, options: options{UntilTime: &until}}, + {name: "since until", Services: []string{service1}, options: options{SinceTime: &until, UntilTime: &since}, + wantErr: true}, + {name: "boot", Services: []string{service1}, options: options{Boot: intPtr(-1)}}, + {name: "boot out of range", Services: []string{service1}, options: options{Boot: intPtr(1)}, wantErr: true}, + {name: "tailLines", Services: []string{service1}, options: options{TailLines: intPtr(100)}}, + {name: "tailLines out of range", Services: []string{service1}, options: options{TailLines: intPtr(100000)}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + n := &nodeLogQuery{ + Services: tt.Services, + Files: tt.Files, + options: tt.options, + } + errs := n.validate() + if len(errs) > 0 != tt.wantErr { + t.Errorf("nodeLogQuery.validate() error = %v, wantErr %v", errs, tt.wantErr) + return + } + }) + } +} + +func intPtr(i int) *int { + return &i +} diff --git a/pkg/kubelet/kubelet_server_journal_windows.go b/pkg/kubelet/kubelet_server_journal_windows.go new file mode 100644 index 00000000000..a805cfc5453 --- /dev/null +++ b/pkg/kubelet/kubelet_server_journal_windows.go @@ -0,0 +1,86 @@ +//go:build windows + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubelet + +import ( + "context" + "fmt" + "os/exec" + "strings" +) + +const powershellExe = "PowerShell.exe" + +// getLoggingCmd returns the powershell cmd and arguments for the given nodeLogQuery and boot +func getLoggingCmd(n *nodeLogQuery, services []string) (string, []string, error) { + args := []string{ + "-NonInteractive", + "-ExecutionPolicy", "Bypass", + "-Command", + } + + psCmd := "Get-WinEvent -FilterHashtable @{LogName='Application'" + if n.SinceTime != nil { + psCmd += fmt.Sprintf("; StartTime='%s'", n.SinceTime.Format(dateLayout)) + } + if n.UntilTime != nil { + psCmd += fmt.Sprintf("; EndTime='%s'", n.UntilTime.Format(dateLayout)) + } + var providers []string + for _, service := range services { + if len(service) > 0 { + providers = append(providers, "'"+service+"'") + } + } + if len(providers) > 0 { + psCmd += fmt.Sprintf("; ProviderName=%s", strings.Join(providers, ",")) + } + psCmd += "}" + if n.TailLines != nil { + psCmd += fmt.Sprintf(" -MaxEvents %d", *n.TailLines) + } + psCmd += " | Sort-Object TimeCreated" + if len(n.Pattern) > 0 { + psCmd += fmt.Sprintf(" | Where-Object -Property Message -Match '%s'", n.Pattern) + } + psCmd += " | Format-Table -AutoSize -Wrap" + + args = append(args, psCmd) + + return powershellExe, args, nil +} + +// checkForNativeLogger always returns true for Windows +func checkForNativeLogger(ctx context.Context, service string) bool { + cmd := exec.CommandContext(ctx, powershellExe, []string{ + "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", + fmt.Sprintf("Get-WinEvent -ListProvider %s | Format-Table -AutoSize", service)}...) + + _, err := cmd.CombinedOutput() + if err != nil { + // Get-WinEvent will return ExitError if the service is not listed as a provider + if _, ok := err.(*exec.ExitError); ok { + return false + } + // Other errors imply that CombinedOutput failed before the command was executed, + // so lets to get the logs using Get-WinEvent at the call site instead of assuming + // the service is logging to a file + } + return true +} diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index 74c5dba9951..c5a8541ce7a 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -291,7 +291,7 @@ func NewServer( server.InstallDebuggingHandlers() // To maintain backward compatibility serve logs and pprof only when enableDebuggingHandlers is also enabled // see https://github.com/kubernetes/kubernetes/pull/87273 - server.InstallSystemLogHandler(kubeCfg.EnableSystemLogHandler) + server.InstallSystemLogHandler(kubeCfg.EnableSystemLogHandler, kubeCfg.EnableSystemLogQuery) server.InstallProfilingHandler(kubeCfg.EnableProfilingHandler, kubeCfg.EnableContentionProfiling) server.InstallDebugFlagsHandler(kubeCfg.EnableDebugFlagsHandler) } else { @@ -573,7 +573,7 @@ func (s *Server) InstallDebuggingDisabledHandlers() { } // InstallSystemLogHandler registers the HTTP request patterns for logs endpoint. -func (s *Server) InstallSystemLogHandler(enableSystemLogHandler bool) { +func (s *Server) InstallSystemLogHandler(enableSystemLogHandler bool, enableSystemLogQuery bool) { s.addMetricsBucketMatcher("logs") if enableSystemLogHandler { ws := new(restful.WebService) @@ -581,10 +581,23 @@ func (s *Server) InstallSystemLogHandler(enableSystemLogHandler bool) { ws.Route(ws.GET(""). To(s.getLogs). Operation("getLogs")) - ws.Route(ws.GET("/{logpath:*}"). - To(s.getLogs). - Operation("getLogs"). - Param(ws.PathParameter("logpath", "path to the log").DataType("string"))) + if !enableSystemLogQuery { + ws.Route(ws.GET("/{logpath:*}"). + To(s.getLogs). + Operation("getLogs"). + Param(ws.PathParameter("logpath", "path to the log").DataType("string"))) + } else { + ws.Route(ws.GET("/{logpath:*}"). + To(s.getLogs). + Operation("getLogs"). + Param(ws.PathParameter("logpath", "path to the log").DataType("string")). + Param(ws.QueryParameter("query", "query specifies services(s) or files from which to return logs").DataType("string")). + Param(ws.QueryParameter("sinceTime", "sinceTime is an RFC3339 timestamp from which to show logs").DataType("string")). + Param(ws.QueryParameter("untilTime", "untilTime is an RFC3339 timestamp until which to show logs").DataType("string")). + Param(ws.QueryParameter("tailLines", "tailLines is used to retrieve the specified number of lines from the end of the log").DataType("string")). + Param(ws.QueryParameter("pattern", "pattern filters log entries by the provided regex pattern").DataType("string")). + Param(ws.QueryParameter("boot", "boot show messages from a specific system boot").DataType("string"))) + } s.restfulCont.Add(ws) } else { s.restfulCont.Handle(logsPath, getHandlerForDisabledEndpoint("logs endpoint is disabled."))