e2e framework: move dumping of information into sub package

This reduces the size of the test/e2e/framework itself. Because it does not
dump anything anymore by default, E2E test suites must set their own dump
function or set the original one by importing
"k8s.io/kubernetes/test/e2e/framework/debug/init".
This commit is contained in:
Patrick Ohly 2022-08-25 13:43:04 +02:00
parent 2c8ef492ae
commit c45a924c5e
9 changed files with 247 additions and 164 deletions

View File

@ -62,6 +62,9 @@ import (
_ "k8s.io/kubernetes/test/e2e/storage"
_ "k8s.io/kubernetes/test/e2e/storage/external"
_ "k8s.io/kubernetes/test/e2e/windows"
// reconfigure framework
_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
)
// handleFlags sets up all flags and parses the command line.

View File

@ -40,6 +40,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
scaleclient "k8s.io/client-go/scale"
"k8s.io/kubernetes/test/e2e/framework"
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
e2erc "k8s.io/kubernetes/test/e2e/framework/rc"
e2eresource "k8s.io/kubernetes/test/e2e/framework/resource"
@ -628,7 +629,7 @@ func runServiceAndWorkloadForResourceConsumer(c clientset.Interface, resourceCli
dpConfig := testutils.DeploymentConfig{
RCConfig: rcConfig,
}
dpConfig.NodeDumpFunc = framework.DumpNodeDebugInfo
dpConfig.NodeDumpFunc = e2edebug.DumpNodeDebugInfo
dpConfig.ContainerDumpFunc = e2ekubectl.LogFailedContainers
switch kind {
@ -736,7 +737,7 @@ func DeleteHorizontalPodAutoscaler(rc *ResourceConsumer, autoscalerName string)
// runReplicaSet launches (and verifies correctness) of a replicaset.
func runReplicaSet(config testutils.ReplicaSetConfig) error {
ginkgo.By(fmt.Sprintf("creating replicaset %s in namespace %s", config.Name, config.Namespace))
config.NodeDumpFunc = framework.DumpNodeDebugInfo
config.NodeDumpFunc = e2edebug.DumpNodeDebugInfo
config.ContainerDumpFunc = e2ekubectl.LogFailedContainers
return testutils.RunReplicaSet(config)
}

View File

@ -0,0 +1,187 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package debug
import (
"context"
"fmt"
"sort"
"time"
"github.com/onsi/ginkgo/v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/kubernetes/test/e2e/framework"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
)
// EventsLister is a func that lists events.
type EventsLister func(opts metav1.ListOptions, ns string) (*v1.EventList, error)
// dumpEventsInNamespace dumps events in the given namespace.
func dumpEventsInNamespace(eventsLister EventsLister, namespace string) {
ginkgo.By(fmt.Sprintf("Collecting events from namespace %q.", namespace))
events, err := eventsLister(metav1.ListOptions{}, namespace)
framework.ExpectNoError(err, "failed to list events in namespace %q", namespace)
ginkgo.By(fmt.Sprintf("Found %d events.", len(events.Items)))
// Sort events by their first timestamp
sortedEvents := events.Items
if len(sortedEvents) > 1 {
sort.Sort(byFirstTimestamp(sortedEvents))
}
for _, e := range sortedEvents {
framework.Logf("At %v - event for %v: %v %v: %v", e.FirstTimestamp, e.InvolvedObject.Name, e.Source, e.Reason, e.Message)
}
// Note that we don't wait for any Cleanup to propagate, which means
// that if you delete a bunch of pods right before ending your test,
// you may or may not see the killing/deletion/Cleanup events.
}
// DumpAllNamespaceInfo dumps events, pods and nodes information in the given namespace.
func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
dumpEventsInNamespace(func(opts metav1.ListOptions, ns string) (*v1.EventList, error) {
return c.CoreV1().Events(ns).List(context.TODO(), opts)
}, namespace)
e2epod.DumpAllPodInfoForNamespace(c, namespace, framework.TestContext.ReportDir)
// If cluster is large, then the following logs are basically useless, because:
// 1. it takes tens of minutes or hours to grab all of them
// 2. there are so many of them that working with them are mostly impossible
// So we dump them only if the cluster is relatively small.
maxNodesForDump := framework.TestContext.MaxNodesToGather
nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
framework.Logf("unable to fetch node list: %v", err)
return
}
if len(nodes.Items) <= maxNodesForDump {
dumpAllNodeInfo(c, nodes)
} else {
framework.Logf("skipping dumping cluster info - cluster too large")
}
}
// byFirstTimestamp sorts a slice of events by first timestamp, using their involvedObject's name as a tie breaker.
type byFirstTimestamp []v1.Event
func (o byFirstTimestamp) Len() int { return len(o) }
func (o byFirstTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o byFirstTimestamp) Less(i, j int) bool {
if o[i].FirstTimestamp.Equal(&o[j].FirstTimestamp) {
return o[i].InvolvedObject.Name < o[j].InvolvedObject.Name
}
return o[i].FirstTimestamp.Before(&o[j].FirstTimestamp)
}
func dumpAllNodeInfo(c clientset.Interface, nodes *v1.NodeList) {
names := make([]string, len(nodes.Items))
for ix := range nodes.Items {
names[ix] = nodes.Items[ix].Name
}
DumpNodeDebugInfo(c, names, framework.Logf)
}
// DumpNodeDebugInfo dumps debug information of the given nodes.
func DumpNodeDebugInfo(c clientset.Interface, nodeNames []string, logFunc func(fmt string, args ...interface{})) {
for _, n := range nodeNames {
logFunc("\nLogging node info for node %v", n)
node, err := c.CoreV1().Nodes().Get(context.TODO(), n, metav1.GetOptions{})
if err != nil {
logFunc("Error getting node info %v", err)
}
logFunc("Node Info: %v", node)
logFunc("\nLogging kubelet events for node %v", n)
for _, e := range getNodeEvents(c, n) {
logFunc("source %v type %v message %v reason %v first ts %v last ts %v, involved obj %+v",
e.Source, e.Type, e.Message, e.Reason, e.FirstTimestamp, e.LastTimestamp, e.InvolvedObject)
}
logFunc("\nLogging pods the kubelet thinks is on node %v", n)
podList, err := getKubeletPods(c, n)
if err != nil {
logFunc("Unable to retrieve kubelet pods for node %v: %v", n, err)
continue
}
for _, p := range podList.Items {
logFunc("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses))
for _, c := range p.Status.InitContainerStatuses {
logFunc("\tInit container %v ready: %v, restart count %v",
c.Name, c.Ready, c.RestartCount)
}
for _, c := range p.Status.ContainerStatuses {
logFunc("\tContainer %v ready: %v, restart count %v",
c.Name, c.Ready, c.RestartCount)
}
}
e2emetrics.HighLatencyKubeletOperations(c, 10*time.Second, n, logFunc)
// TODO: Log node resource info
}
}
// getKubeletPods retrieves the list of pods on the kubelet.
func getKubeletPods(c clientset.Interface, node string) (*v1.PodList, error) {
var client restclient.Result
finished := make(chan struct{}, 1)
go func() {
// call chain tends to hang in some cases when Node is not ready. Add an artificial timeout for this call. #22165
client = c.CoreV1().RESTClient().Get().
Resource("nodes").
SubResource("proxy").
Name(fmt.Sprintf("%v:%v", node, framework.KubeletPort)).
Suffix("pods").
Do(context.TODO())
finished <- struct{}{}
}()
select {
case <-finished:
result := &v1.PodList{}
if err := client.Into(result); err != nil {
return &v1.PodList{}, err
}
return result, nil
case <-time.After(framework.PodGetTimeout):
return &v1.PodList{}, fmt.Errorf("Waiting up to %v for getting the list of pods", framework.PodGetTimeout)
}
}
// logNodeEvents logs kubelet events from the given node. This includes kubelet
// restart and node unhealthy events. Note that listing events like this will mess
// with latency metrics, beware of calling it during a test.
func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event {
selector := fields.Set{
"involvedObject.kind": "Node",
"involvedObject.name": nodeName,
"involvedObject.namespace": metav1.NamespaceAll,
"source": "kubelet",
}.AsSelector().String()
options := metav1.ListOptions{FieldSelector: selector}
events, err := c.CoreV1().Events(metav1.NamespaceSystem).List(context.TODO(), options)
if err != nil {
framework.Logf("Unexpected error retrieving node events %v", err)
return []v1.Event{}
}
return events.Items
}

View File

@ -0,0 +1,33 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package init sets debug.DumpAllNamespaceInfo as implementation in the framework.
package init
import (
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/debug"
)
func init() {
framework.NewFrameworkExtensions = append(framework.NewFrameworkExtensions,
func(f *framework.Framework) {
f.DumpAllNamespaceInfo = func(f *framework.Framework, ns string) {
debug.DumpAllNamespaceInfo(f.ClientSet, ns)
}
},
)
}

View File

@ -128,8 +128,16 @@ type Framework struct {
// Timeouts contains the custom timeouts used during the test execution.
Timeouts *TimeoutContext
// DumpAllNamespaceInfo is invoked by the framework to record
// information about a namespace after a test failure.
DumpAllNamespaceInfo DumpAllNamespaceInfoAction
}
// DumpAllNamespaceInfoAction is called after each failed test for namespaces
// created for the test.
type DumpAllNamespaceInfoAction func(f *Framework, namespace string)
// TestDataSummary is an interface for managing test data.
type TestDataSummary interface {
SummaryKind() string
@ -325,7 +333,7 @@ func (f *Framework) dumpNamespaceInfo() {
ginkgo.By("dump namespace information after failure", func() {
if !f.SkipNamespaceCreation {
for _, ns := range f.namespacesToDelete {
DumpAllNamespaceInfo(f.ClientSet, ns.Name)
f.DumpAllNamespaceInfo(f, ns.Name)
}
}
})
@ -391,8 +399,8 @@ func (f *Framework) AfterEach() {
nsDeletionErrors[ns.Name] = err
// Dump namespace if we are unable to delete the namespace and the dump was not already performed.
if !ginkgo.CurrentSpecReport().Failed() && TestContext.DumpLogsOnFailure {
DumpAllNamespaceInfo(f.ClientSet, ns.Name)
if !ginkgo.CurrentSpecReport().Failed() && TestContext.DumpLogsOnFailure && f.DumpAllNamespaceInfo != nil {
f.DumpAllNamespaceInfo(f, ns.Name)
}
} else {
Logf("Namespace %v was already deleted", ns.Name)
@ -498,8 +506,8 @@ func (f *Framework) DeleteNamespace(name string) {
}
}()
// if current test failed then we should dump namespace information
if !f.SkipNamespaceCreation && ginkgo.CurrentSpecReport().Failed() && TestContext.DumpLogsOnFailure {
DumpAllNamespaceInfo(f.ClientSet, name)
if !f.SkipNamespaceCreation && ginkgo.CurrentSpecReport().Failed() && TestContext.DumpLogsOnFailure && f.DumpAllNamespaceInfo != nil {
f.DumpAllNamespaceInfo(f, name)
}
}

View File

@ -26,10 +26,10 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
clientset "k8s.io/client-go/kubernetes"
scaleclient "k8s.io/client-go/scale"
"k8s.io/kubernetes/test/e2e/framework"
e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
e2eresource "k8s.io/kubernetes/test/e2e/framework/resource"
testutils "k8s.io/kubernetes/test/utils"
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
)
// ByNameContainer returns a ReplicationController with specified name and container
@ -83,7 +83,7 @@ func ScaleRC(clientset clientset.Interface, scalesGetter scaleclient.ScalesGette
// and will wait for all pods it spawns to become "Running".
func RunRC(config testutils.RCConfig) error {
ginkgo.By(fmt.Sprintf("creating replication controller %s in namespace %s", config.Name, config.Namespace))
config.NodeDumpFunc = framework.DumpNodeDebugInfo
config.NodeDumpFunc = e2edebug.DumpNodeDebugInfo
config.ContainerDumpFunc = e2ekubectl.LogFailedContainers
return testutils.RunRC(config)
}

View File

@ -27,7 +27,6 @@ import (
"os"
"os/exec"
"path"
"sort"
"strconv"
"strings"
"sync"
@ -59,9 +58,7 @@ import (
// TODO: Remove the following imports (ref: https://github.com/kubernetes/kubernetes/issues/81245)
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
)
const (
@ -556,158 +553,6 @@ func TryKill(cmd *exec.Cmd) {
}
}
// EventsLister is a func that lists events.
type EventsLister func(opts metav1.ListOptions, ns string) (*v1.EventList, error)
// dumpEventsInNamespace dumps events in the given namespace.
func dumpEventsInNamespace(eventsLister EventsLister, namespace string) {
ginkgo.By(fmt.Sprintf("Collecting events from namespace %q.", namespace))
events, err := eventsLister(metav1.ListOptions{}, namespace)
ExpectNoError(err, "failed to list events in namespace %q", namespace)
ginkgo.By(fmt.Sprintf("Found %d events.", len(events.Items)))
// Sort events by their first timestamp
sortedEvents := events.Items
if len(sortedEvents) > 1 {
sort.Sort(byFirstTimestamp(sortedEvents))
}
for _, e := range sortedEvents {
Logf("At %v - event for %v: %v %v: %v", e.FirstTimestamp, e.InvolvedObject.Name, e.Source, e.Reason, e.Message)
}
// Note that we don't wait for any Cleanup to propagate, which means
// that if you delete a bunch of pods right before ending your test,
// you may or may not see the killing/deletion/Cleanup events.
}
// DumpAllNamespaceInfo dumps events, pods and nodes information in the given namespace.
func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
dumpEventsInNamespace(func(opts metav1.ListOptions, ns string) (*v1.EventList, error) {
return c.CoreV1().Events(ns).List(context.TODO(), opts)
}, namespace)
e2epod.DumpAllPodInfoForNamespace(c, namespace, TestContext.ReportDir)
// If cluster is large, then the following logs are basically useless, because:
// 1. it takes tens of minutes or hours to grab all of them
// 2. there are so many of them that working with them are mostly impossible
// So we dump them only if the cluster is relatively small.
maxNodesForDump := TestContext.MaxNodesToGather
nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
Logf("unable to fetch node list: %v", err)
return
}
if len(nodes.Items) <= maxNodesForDump {
dumpAllNodeInfo(c, nodes)
} else {
Logf("skipping dumping cluster info - cluster too large")
}
}
// byFirstTimestamp sorts a slice of events by first timestamp, using their involvedObject's name as a tie breaker.
type byFirstTimestamp []v1.Event
func (o byFirstTimestamp) Len() int { return len(o) }
func (o byFirstTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o byFirstTimestamp) Less(i, j int) bool {
if o[i].FirstTimestamp.Equal(&o[j].FirstTimestamp) {
return o[i].InvolvedObject.Name < o[j].InvolvedObject.Name
}
return o[i].FirstTimestamp.Before(&o[j].FirstTimestamp)
}
func dumpAllNodeInfo(c clientset.Interface, nodes *v1.NodeList) {
names := make([]string, len(nodes.Items))
for ix := range nodes.Items {
names[ix] = nodes.Items[ix].Name
}
DumpNodeDebugInfo(c, names, Logf)
}
// DumpNodeDebugInfo dumps debug information of the given nodes.
func DumpNodeDebugInfo(c clientset.Interface, nodeNames []string, logFunc func(fmt string, args ...interface{})) {
for _, n := range nodeNames {
logFunc("\nLogging node info for node %v", n)
node, err := c.CoreV1().Nodes().Get(context.TODO(), n, metav1.GetOptions{})
if err != nil {
logFunc("Error getting node info %v", err)
}
logFunc("Node Info: %v", node)
logFunc("\nLogging kubelet events for node %v", n)
for _, e := range getNodeEvents(c, n) {
logFunc("source %v type %v message %v reason %v first ts %v last ts %v, involved obj %+v",
e.Source, e.Type, e.Message, e.Reason, e.FirstTimestamp, e.LastTimestamp, e.InvolvedObject)
}
logFunc("\nLogging pods the kubelet thinks is on node %v", n)
podList, err := getKubeletPods(c, n)
if err != nil {
logFunc("Unable to retrieve kubelet pods for node %v: %v", n, err)
continue
}
for _, p := range podList.Items {
logFunc("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses))
for _, c := range p.Status.InitContainerStatuses {
logFunc("\tInit container %v ready: %v, restart count %v",
c.Name, c.Ready, c.RestartCount)
}
for _, c := range p.Status.ContainerStatuses {
logFunc("\tContainer %v ready: %v, restart count %v",
c.Name, c.Ready, c.RestartCount)
}
}
e2emetrics.HighLatencyKubeletOperations(c, 10*time.Second, n, logFunc)
// TODO: Log node resource info
}
}
// getKubeletPods retrieves the list of pods on the kubelet.
func getKubeletPods(c clientset.Interface, node string) (*v1.PodList, error) {
var client restclient.Result
finished := make(chan struct{}, 1)
go func() {
// call chain tends to hang in some cases when Node is not ready. Add an artificial timeout for this call. #22165
client = c.CoreV1().RESTClient().Get().
Resource("nodes").
SubResource("proxy").
Name(fmt.Sprintf("%v:%v", node, KubeletPort)).
Suffix("pods").
Do(context.TODO())
finished <- struct{}{}
}()
select {
case <-finished:
result := &v1.PodList{}
if err := client.Into(result); err != nil {
return &v1.PodList{}, err
}
return result, nil
case <-time.After(PodGetTimeout):
return &v1.PodList{}, fmt.Errorf("Waiting up to %v for getting the list of pods", PodGetTimeout)
}
}
// logNodeEvents logs kubelet events from the given node. This includes kubelet
// restart and node unhealthy events. Note that listing events like this will mess
// with latency metrics, beware of calling it during a test.
func getNodeEvents(c clientset.Interface, nodeName string) []v1.Event {
selector := fields.Set{
"involvedObject.kind": "Node",
"involvedObject.name": nodeName,
"involvedObject.namespace": metav1.NamespaceAll,
"source": "kubelet",
}.AsSelector().String()
options := metav1.ListOptions{FieldSelector: selector}
events, err := c.CoreV1().Events(metav1.NamespaceSystem).List(context.TODO(), options)
if err != nil {
Logf("Unexpected error retrieving node events %v", err)
return []v1.Event{}
}
return events.Items
}
// WaitForAllNodesSchedulable waits up to timeout for all
// (but TestContext.AllowedNotReadyNodes) to become schedulable.
func WaitForAllNodesSchedulable(c clientset.Interface, timeout time.Duration) error {

View File

@ -27,6 +27,9 @@ import (
"k8s.io/kubernetes/test/e2e/framework"
e2econfig "k8s.io/kubernetes/test/e2e/framework/config"
// reconfigure framework
_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
)
func TestMain(m *testing.M) {

View File

@ -53,6 +53,9 @@ import (
e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests"
system "k8s.io/system-validators/validators"
// reconfigure framework
_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/spf13/pflag"