Merge pull request #105451 from claudiubelu/tests/log-pod-logs

tests: Fetch the pod logs in failed cases
This commit is contained in:
Kubernetes Prow Robot 2021-11-18 13:33:36 -08:00 committed by GitHub
commit 51b94de68f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 70 additions and 6 deletions

View File

@ -19,6 +19,8 @@ package pod
import (
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
@ -43,6 +45,11 @@ import (
// the pod has already reached completed state.
var errPodCompleted = fmt.Errorf("pod ran to completion")
// LabelLogOnPodFailure can be used to mark which Pods will have their logs logged in the case of
// a test failure. By default, if there are no Pods with this label, only the first 5 Pods will
// have their logs fetched.
const LabelLogOnPodFailure = "log-on-pod-failure"
// TODO: Move to its own subpkg.
// expectNoError checks if "err" is set, and if so, fails assertion while logging the error.
func expectNoError(err error, explain ...interface{}) {
@ -405,14 +412,68 @@ func logPodTerminationMessages(pods []v1.Pod) {
}
}
// logPodLogs logs the container logs from pods in the given namespace. This can be helpful for debugging
// issues that do not cause the container to fail (e.g.: network connectivity issues)
// We will log the Pods that have the LabelLogOnPodFailure label. If there aren't any, we default to
// logging only the first 5 Pods. This requires the reportDir to be set, and the pods are logged into:
// {report_dir}/pods/{namespace}/{pod}/{container_name}/logs.txt
func logPodLogs(c clientset.Interface, namespace string, pods []v1.Pod, reportDir string) {
if reportDir == "" {
return
}
var logPods []v1.Pod
for _, pod := range pods {
if _, ok := pod.Labels[LabelLogOnPodFailure]; ok {
logPods = append(logPods, pod)
}
}
maxPods := len(logPods)
// There are no pods with the LabelLogOnPodFailure label, we default to the first 5 Pods.
if maxPods == 0 {
logPods = pods
maxPods = len(pods)
if maxPods > 5 {
maxPods = 5
}
}
tailLen := 42
for i := 0; i < maxPods; i++ {
pod := logPods[i]
for _, container := range pod.Spec.Containers {
logs, err := getPodLogsInternal(c, namespace, pod.Name, container.Name, false, nil, &tailLen)
if err != nil {
e2elog.Logf("Unable to fetch %s/%s/%s logs: %v", pod.Namespace, pod.Name, container.Name, err)
continue
}
logDir := filepath.Join(reportDir, namespace, pod.Name, container.Name)
err = os.MkdirAll(logDir, 0755)
if err != nil {
e2elog.Logf("Unable to create path '%s'. Err: %v", logDir, err)
continue
}
logPath := filepath.Join(logDir, "logs.txt")
err = os.WriteFile(logPath, []byte(logs), 0644)
if err != nil {
e2elog.Logf("Could not write the container logs in: %s. Err: %v", logPath, err)
}
}
}
}
// DumpAllPodInfoForNamespace logs all pod information for a given namespace.
func DumpAllPodInfoForNamespace(c clientset.Interface, namespace string) {
func DumpAllPodInfoForNamespace(c clientset.Interface, namespace, reportDir string) {
pods, err := c.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
e2elog.Logf("unable to fetch pod debug info: %v", err)
}
LogPodStates(pods.Items)
logPodTerminationMessages(pods.Items)
logPodLogs(c, namespace, pods.Items, reportDir)
}
// FilterNonRestartablePods filters out pods that will never get recreated if
@ -561,23 +622,23 @@ func checkPodsCondition(c clientset.Interface, ns string, podNames []string, tim
// GetPodLogs returns the logs of the specified container (namespace/pod/container).
func GetPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
return getPodLogsInternal(c, namespace, podName, containerName, false, nil)
return getPodLogsInternal(c, namespace, podName, containerName, false, nil, nil)
}
// GetPodLogsSince returns the logs of the specified container (namespace/pod/container) since a timestamp.
func GetPodLogsSince(c clientset.Interface, namespace, podName, containerName string, since time.Time) (string, error) {
sinceTime := metav1.NewTime(since)
return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime)
return getPodLogsInternal(c, namespace, podName, containerName, false, &sinceTime, nil)
}
// GetPreviousPodLogs returns the logs of the previous instance of the
// specified container (namespace/pod/container).
func GetPreviousPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
return getPodLogsInternal(c, namespace, podName, containerName, true, nil)
return getPodLogsInternal(c, namespace, podName, containerName, true, nil, nil)
}
// utility function for gomega Eventually
func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time) (string, error) {
func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool, sinceTime *metav1.Time, tailLines *int) (string, error) {
request := c.CoreV1().RESTClient().Get().
Resource("pods").
Namespace(namespace).
@ -587,6 +648,9 @@ func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName
if sinceTime != nil {
request.Param("sinceTime", sinceTime.Format(time.RFC3339))
}
if tailLines != nil {
request.Param("tailLines", strconv.Itoa(*tailLines))
}
logs, err := request.Do(context.TODO()).Raw()
if err != nil {
return "", err

View File

@ -899,7 +899,7 @@ func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
return c.CoreV1().Events(ns).List(context.TODO(), opts)
}, namespace)
e2epod.DumpAllPodInfoForNamespace(c, namespace)
e2epod.DumpAllPodInfoForNamespace(c, namespace, TestContext.ReportDir)
// If cluster is large, then the following logs are basically useless, because:
// 1. it takes tens of minutes or hours to grab all of them