diff --git a/test/e2e/framework/pod/resource.go b/test/e2e/framework/pod/resource.go index 0f29b78c78b..380b8930674 100644 --- a/test/e2e/framework/pod/resource.go +++ b/test/e2e/framework/pod/resource.go @@ -19,6 +19,8 @@ package pod import ( "context" "fmt" + "os" + "path/filepath" "strconv" "strings" "time" @@ -43,6 +45,11 @@ import ( // the pod has already reached completed state. var errPodCompleted = fmt.Errorf("pod ran to completion") +// LabelLogOnPodFailure can be used to mark which Pods will have their logs logged in the case of +// a test failure. By default, if there are no Pods with this label, only the first 5 Pods will +// have their logs fetched. +const LabelLogOnPodFailure = "log-on-pod-failure" + // TODO: Move to its own subpkg. // expectNoError checks if "err" is set, and if so, fails assertion while logging the error. func expectNoError(err error, explain ...interface{}) { @@ -388,14 +395,68 @@ func logPodTerminationMessages(pods []v1.Pod) { } } +// logPodLogs logs the container logs from pods in the given namespace. This can be helpful for debugging +// issues that do not cause the container to fail (e.g.: network connectivity issues) +// We will log the Pods that have the LabelLogOnPodFailure label. If there aren't any, we default to +// logging only the first 5 Pods. This requires the reportDir to be set, and the pods are logged into: +// {report_dir}/pods/{namespace}/{pod}/{container_name}/logs.txt +func logPodLogs(c clientset.Interface, namespace string, pods []v1.Pod, reportDir string) { + if reportDir == "" { + return + } + + var logPods []v1.Pod + for _, pod := range pods { + if _, ok := pod.Labels[LabelLogOnPodFailure]; ok { + logPods = append(logPods, pod) + } + } + maxPods := len(logPods) + + // There are no pods with the LabelLogOnPodFailure label, we default to the first 5 Pods. + if maxPods == 0 { + logPods = pods + maxPods = len(pods) + if maxPods > 5 { + maxPods = 5 + } + } + + tailLen := 42 + for i := 0; i < maxPods; i++ { + pod := logPods[i] + for _, container := range pod.Spec.Containers { + logs, err := getPodLogsInternal(c, namespace, pod.Name, container.Name, false, nil, &tailLen) + if err != nil { + e2elog.Logf("Unable to fetch %s/%s/%s logs: %v", pod.Namespace, pod.Name, container.Name, err) + continue + } + + logDir := filepath.Join(reportDir, namespace, pod.Name, container.Name) + err = os.MkdirAll(logDir, 0755) + if err != nil { + e2elog.Logf("Unable to create path '%s'. Err: %v", logDir, err) + continue + } + + logPath := filepath.Join(logDir, "logs.txt") + err = os.WriteFile(logPath, []byte(logs), 0644) + if err != nil { + e2elog.Logf("Could not write the container logs in: %s. Err: %v", logPath, err) + } + } + } +} + // DumpAllPodInfoForNamespace logs all pod information for a given namespace. -func DumpAllPodInfoForNamespace(c clientset.Interface, namespace string) { +func DumpAllPodInfoForNamespace(c clientset.Interface, namespace, reportDir string) { pods, err := c.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{}) if err != nil { e2elog.Logf("unable to fetch pod debug info: %v", err) } LogPodStates(pods.Items) logPodTerminationMessages(pods.Items) + logPodLogs(c, namespace, pods.Items, reportDir) } // FilterNonRestartablePods filters out pods that will never get recreated if @@ -544,23 +605,23 @@ func checkPodsCondition(c clientset.Interface, ns string, podNames []string, tim // GetPodLogs returns the logs of the specified container (namespace/pod/container). func GetPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) { - return getPodLogsInternal(c, namespace, podName, containerName, false, nil) + return getPodLogsInternal(c, namespace, podName, containerName, false, nil, nil) } // GetPodLogsSince returns the logs of the specified container (namespace/pod/container) since a timestamp. func GetPodLogsSince(c clientset.Interface, namespace, podName, containerName string, since time.Time) (string, error) { sinceTime := metav1.NewTime(since) - return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime) + return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime, nil) } // GetPreviousPodLogs returns the logs of the previous instance of the // specified container (namespace/pod/container). func GetPreviousPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) { - return getPodLogsInternal(c, namespace, podName, containerName, true, nil) + return getPodLogsInternal(c, namespace, podName, containerName, true, nil, nil) } // utility function for gomega Eventually -func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time) (string, error) { +func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time, tailLines *int) (string, error) { request := c.CoreV1().RESTClient().Get(). Resource("pods"). Namespace(namespace). @@ -570,6 +631,9 @@ func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName if sinceTime != nil { request.Param("sinceTime", sinceTime.Format(time.RFC3339)) } + if tailLines != nil { + request.Param("tailLines", strconv.Itoa(*tailLines)) + } logs, err := request.Do(context.TODO()).Raw() if err != nil { return "", err diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index c14b86c9266..3f323fc0ba5 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -899,7 +899,7 @@ func DumpAllNamespaceInfo(c clientset.Interface, namespace string) { return c.CoreV1().Events(ns).List(context.TODO(), opts) }, namespace) - e2epod.DumpAllPodInfoForNamespace(c, namespace) + e2epod.DumpAllPodInfoForNamespace(c, namespace, TestContext.ReportDir) // If cluster is large, then the following logs are basically useless, because: // 1. it takes tens of minutes or hours to grab all of them