Merge pull request #105451 from claudiubelu/tests/log-pod-logs

tests: Fetch the pod logs in failed cases
2025-09-15 14:14:39 +00:00 · 2021-11-18 13:33:36 -08:00
parent 203d145b6a afb8569620
commit 51b94de68f
2 changed files with 70 additions and 6 deletions
--- a/test/e2e/framework/pod/resource.go
+++ b/test/e2e/framework/pod/resource.go
@@ -19,6 +19,8 @@ package pod
 import (
 	"context"
 	"fmt"
+	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
@@ -43,6 +45,11 @@ import (
 // the pod has already reached completed state.
 var errPodCompleted = fmt.Errorf("pod ran to completion")

+// LabelLogOnPodFailure can be used to mark which Pods will have their logs logged in the case of
+// a test failure. By default, if there are no Pods with this label, only the first 5 Pods will
+// have their logs fetched.
+const LabelLogOnPodFailure = "log-on-pod-failure"
+
 // TODO: Move to its own subpkg.
 // expectNoError checks if "err" is set, and if so, fails assertion while logging the error.
 func expectNoError(err error, explain ...interface{}) {
@@ -405,14 +412,68 @@ func logPodTerminationMessages(pods []v1.Pod) {
 	}
 }

+// logPodLogs logs the container logs from pods in the given namespace. This can be helpful for debugging
+// issues that do not cause the container to fail (e.g.: network connectivity issues)
+// We will log the Pods that have the LabelLogOnPodFailure label. If there aren't any, we default to
+// logging only the first 5 Pods. This requires the reportDir to be set, and the pods are logged into:
+// {report_dir}/pods/{namespace}/{pod}/{container_name}/logs.txt
+func logPodLogs(c clientset.Interface, namespace string, pods []v1.Pod, reportDir string) {
+	if reportDir == "" {
+		return
+	}
+
+	var logPods []v1.Pod
+	for _, pod := range pods {
+		if _, ok := pod.Labels[LabelLogOnPodFailure]; ok {
+			logPods = append(logPods, pod)
+		}
+	}
+	maxPods := len(logPods)
+
+	// There are no pods with the LabelLogOnPodFailure label, we default to the first 5 Pods.
+	if maxPods == 0 {
+		logPods = pods
+		maxPods = len(pods)
+		if maxPods > 5 {
+			maxPods = 5
+		}
+	}
+
+	tailLen := 42
+	for i := 0; i < maxPods; i++ {
+		pod := logPods[i]
+		for _, container := range pod.Spec.Containers {
+			logs, err := getPodLogsInternal(c, namespace, pod.Name, container.Name, false, nil, &tailLen)
+			if err != nil {
+				e2elog.Logf("Unable to fetch %s/%s/%s logs: %v", pod.Namespace, pod.Name, container.Name, err)
+				continue
+			}
+
+			logDir := filepath.Join(reportDir, namespace, pod.Name, container.Name)
+			err = os.MkdirAll(logDir, 0755)
+			if err != nil {
+				e2elog.Logf("Unable to create path '%s'. Err: %v", logDir, err)
+				continue
+			}
+
+			logPath := filepath.Join(logDir, "logs.txt")
+			err = os.WriteFile(logPath, []byte(logs), 0644)
+			if err != nil {
+				e2elog.Logf("Could not write the container logs in: %s. Err: %v", logPath, err)
+			}
+		}
+	}
+}
+
 // DumpAllPodInfoForNamespace logs all pod information for a given namespace.
-func DumpAllPodInfoForNamespace(c clientset.Interface, namespace string) {
+func DumpAllPodInfoForNamespace(c clientset.Interface, namespace, reportDir string) {
 	pods, err := c.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
 	if err != nil {
 		e2elog.Logf("unable to fetch pod debug info: %v", err)
 	}
 	LogPodStates(pods.Items)
 	logPodTerminationMessages(pods.Items)
+	logPodLogs(c, namespace, pods.Items, reportDir)
 }

 // FilterNonRestartablePods filters out pods that will never get recreated if
@@ -561,23 +622,23 @@ func checkPodsCondition(c clientset.Interface, ns string, podNames []string, tim

 // GetPodLogs returns the logs of the specified container (namespace/pod/container).
 func GetPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
-	return getPodLogsInternal(c, namespace, podName, containerName, false, nil)
+	return getPodLogsInternal(c, namespace, podName, containerName, false, nil, nil)
 }

 // GetPodLogsSince returns the logs of the specified container (namespace/pod/container) since a timestamp.
 func GetPodLogsSince(c clientset.Interface, namespace, podName, containerName string, since time.Time) (string, error) {
 	sinceTime := metav1.NewTime(since)
-	return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime)
+	return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime, nil)
 }

 // GetPreviousPodLogs returns the logs of the previous instance of the
 // specified container (namespace/pod/container).
 func GetPreviousPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
-	return getPodLogsInternal(c, namespace, podName, containerName, true, nil)
+	return getPodLogsInternal(c, namespace, podName, containerName, true, nil, nil)
 }

 // utility function for gomega Eventually
-func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time) (string, error) {
+func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time, tailLines *int) (string, error) {
 	request := c.CoreV1().RESTClient().Get().
 		Resource("pods").
 		Namespace(namespace).
@@ -587,6 +648,9 @@ func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName
 	if sinceTime != nil {
 		request.Param("sinceTime", sinceTime.Format(time.RFC3339))
 	}
+	if tailLines != nil {
+		request.Param("tailLines", strconv.Itoa(*tailLines))
+	}
 	logs, err := request.Do(context.TODO()).Raw()
 	if err != nil {
 		return "", err
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@@ -899,7 +899,7 @@ func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
 		return c.CoreV1().Events(ns).List(context.TODO(), opts)
 	}, namespace)

-	e2epod.DumpAllPodInfoForNamespace(c, namespace)
+	e2epod.DumpAllPodInfoForNamespace(c, namespace, TestContext.ReportDir)

 	// If cluster is large, then the following logs are basically useless, because:
 	// 1. it takes tens of minutes or hours to grab all of them