mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
e2e framework: move resource gathering into framework/debug
This helps getting rid of the ssh dependency. The same init package as for dumping namespaces takes care of adding the functionality back to framework instances.
This commit is contained in:
parent
f9bc4f837b
commit
70d0824f01
@ -15,35 +15,47 @@ limitations under the License.
|
||||
*/
|
||||
|
||||
// Package init sets debug.DumpAllNamespaceInfo as implementation in the framework
|
||||
// and enables log size verification.
|
||||
// and enables log size verification and resource gathering.
|
||||
package init
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
"k8s.io/kubernetes/test/e2e/framework/debug"
|
||||
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
|
||||
)
|
||||
|
||||
var (
|
||||
// TODO: this variable used to be a field in framework.Framework. It is
|
||||
// not clear how it was ever set. https://grep.app/search?q=AddonResourceConstraints
|
||||
// returns only the default initialization with an empty map. Perhaps it can be removed?
|
||||
|
||||
// Constraints that passed to a check which is executed after data is gathered to
|
||||
// see if 99% of results are within acceptable bounds. It has to be injected in the test,
|
||||
// as expectations vary greatly. Constraints are grouped by the container names.
|
||||
AddonResourceConstraints map[string]e2edebug.ResourceConstraint
|
||||
)
|
||||
|
||||
func init() {
|
||||
framework.NewFrameworkExtensions = append(framework.NewFrameworkExtensions,
|
||||
func(f *framework.Framework) {
|
||||
f.DumpAllNamespaceInfo = func(f *framework.Framework, ns string) {
|
||||
debug.DumpAllNamespaceInfo(f.ClientSet, ns)
|
||||
e2edebug.DumpAllNamespaceInfo(f.ClientSet, ns)
|
||||
}
|
||||
|
||||
if framework.TestContext.GatherLogsSizes {
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
closeChannel chan bool
|
||||
verifier *debug.LogsSizeVerifier
|
||||
verifier *e2edebug.LogsSizeVerifier
|
||||
)
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
wg.Add(1)
|
||||
closeChannel = make(chan bool)
|
||||
verifier = debug.NewLogsVerifier(f.ClientSet, closeChannel)
|
||||
verifier = e2edebug.NewLogsVerifier(f.ClientSet, closeChannel)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
verifier.Run()
|
||||
@ -57,6 +69,44 @@ func init() {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
if framework.TestContext.GatherKubeSystemResourceUsageData != "false" &&
|
||||
framework.TestContext.GatherKubeSystemResourceUsageData != "none" {
|
||||
ginkgo.BeforeEach(func() {
|
||||
var nodeMode e2edebug.NodesSet
|
||||
switch framework.TestContext.GatherKubeSystemResourceUsageData {
|
||||
case "master":
|
||||
nodeMode = e2edebug.MasterNodes
|
||||
case "masteranddns":
|
||||
nodeMode = e2edebug.MasterAndDNSNodes
|
||||
default:
|
||||
nodeMode = e2edebug.AllNodes
|
||||
}
|
||||
|
||||
gatherer, err := e2edebug.NewResourceUsageGatherer(f.ClientSet, e2edebug.ResourceGathererOptions{
|
||||
InKubemark: framework.ProviderIs("kubemark"),
|
||||
Nodes: nodeMode,
|
||||
ResourceDataGatheringPeriod: 60 * time.Second,
|
||||
ProbeDuration: 15 * time.Second,
|
||||
PrintVerboseLogs: false,
|
||||
}, nil)
|
||||
if err != nil {
|
||||
framework.Logf("Error while creating NewResourceUsageGatherer: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
go gatherer.StartGatheringData()
|
||||
ginkgo.DeferCleanup(func() {
|
||||
ginkgo.By("Collecting resource usage data", func() {
|
||||
summary, resourceViolationError := gatherer.StopAndSummarize([]int{90, 99, 100}, AddonResourceConstraints)
|
||||
// Always record the summary, even if there was an error.
|
||||
f.TestSummaries = append(f.TestSummaries, summary)
|
||||
// Now fail if there was an error.
|
||||
framework.ExpectNoError(resourceViolationError)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
package debug
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
@ -38,7 +38,7 @@ import (
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
|
||||
// TODO: Remove the following imports (ref: https://github.com/kubernetes/kubernetes/issues/81245)
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
|
||||
)
|
||||
|
||||
@ -91,7 +91,7 @@ func (s *ResourceUsageSummary) PrintHumanReadable() string {
|
||||
|
||||
// PrintJSON prints resource usage summary in JSON.
|
||||
func (s *ResourceUsageSummary) PrintJSON() string {
|
||||
return PrettyPrintJSON(*s)
|
||||
return framework.PrettyPrintJSON(*s)
|
||||
}
|
||||
|
||||
// SummaryKind returns string of ResourceUsageSummary
|
||||
@ -198,13 +198,13 @@ func (w *resourceGatherWorker) singleProbe() {
|
||||
} else {
|
||||
nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, w.probeDuration, func() []string { return w.containerIDs })
|
||||
if err != nil {
|
||||
Logf("Error while reading data from %v: %v", w.nodeName, err)
|
||||
framework.Logf("Error while reading data from %v: %v", w.nodeName, err)
|
||||
return
|
||||
}
|
||||
for k, v := range nodeUsage {
|
||||
data[k] = v
|
||||
if w.printVerboseLogs {
|
||||
Logf("Get container %v usage on node %v. CPUUsageInCores: %v, MemoryUsageInBytes: %v, MemoryWorkingSetInBytes: %v", k, w.nodeName, v.CPUUsageInCores, v.MemoryUsageInBytes, v.MemoryWorkingSetInBytes)
|
||||
framework.Logf("Get container %v usage on node %v. CPUUsageInCores: %v, MemoryUsageInBytes: %v, MemoryWorkingSetInBytes: %v", k, w.nodeName, v.CPUUsageInCores, v.MemoryUsageInBytes, v.MemoryWorkingSetInBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -290,13 +290,13 @@ func getOneTimeResourceUsageOnNode(
|
||||
|
||||
// getStatsSummary contacts kubelet for the container information.
|
||||
func getStatsSummary(c clientset.Interface, nodeName string) (*kubeletstatsv1alpha1.Summary, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
data, err := c.CoreV1().RESTClient().Get().
|
||||
Resource("nodes").
|
||||
SubResource("proxy").
|
||||
Name(fmt.Sprintf("%v:%v", nodeName, KubeletPort)).
|
||||
Name(fmt.Sprintf("%v:%v", nodeName, framework.KubeletPort)).
|
||||
Suffix("stats/summary").
|
||||
Do(ctx).Raw()
|
||||
|
||||
@ -322,7 +322,7 @@ func removeUint64Ptr(ptr *uint64) uint64 {
|
||||
func (w *resourceGatherWorker) gather(initialSleep time.Duration) {
|
||||
defer utilruntime.HandleCrash()
|
||||
defer w.wg.Done()
|
||||
defer Logf("Closing worker for %v", w.nodeName)
|
||||
defer framework.Logf("Closing worker for %v", w.nodeName)
|
||||
defer func() { w.finished = true }()
|
||||
select {
|
||||
case <-time.After(initialSleep):
|
||||
@ -384,7 +384,7 @@ func nodeHasControlPlanePods(c clientset.Interface, nodeName string) (bool, erro
|
||||
return false, err
|
||||
}
|
||||
if len(podList.Items) < 1 {
|
||||
Logf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem)
|
||||
framework.Logf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem)
|
||||
}
|
||||
for _, pod := range podList.Items {
|
||||
if regKubeScheduler.MatchString(pod.Name) || regKubeControllerManager.MatchString(pod.Name) {
|
||||
@ -422,7 +422,7 @@ func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOpt
|
||||
if pods == nil {
|
||||
pods, err = c.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
Logf("Error while listing Pods: %v", err)
|
||||
framework.Logf("Error while listing Pods: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@ -458,7 +458,7 @@ func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOpt
|
||||
}
|
||||
nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
Logf("Error while listing Nodes: %v", err)
|
||||
framework.Logf("Error while listing Nodes: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -510,7 +510,7 @@ func (g *ContainerResourceGatherer) StartGatheringData() {
|
||||
// specified resource constraints.
|
||||
func (g *ContainerResourceGatherer) StopAndSummarize(percentiles []int, constraints map[string]ResourceConstraint) (*ResourceUsageSummary, error) {
|
||||
close(g.stopCh)
|
||||
Logf("Closed stop channel. Waiting for %v workers", len(g.workers))
|
||||
framework.Logf("Closed stop channel. Waiting for %v workers", len(g.workers))
|
||||
finished := make(chan struct{}, 1)
|
||||
go func() {
|
||||
g.workerWg.Wait()
|
||||
@ -518,7 +518,7 @@ func (g *ContainerResourceGatherer) StopAndSummarize(percentiles []int, constrai
|
||||
}()
|
||||
select {
|
||||
case <-finished:
|
||||
Logf("Waitgroup finished.")
|
||||
framework.Logf("Waitgroup finished.")
|
||||
case <-time.After(2 * time.Minute):
|
||||
unfinished := make([]string, 0)
|
||||
for i := range g.workers {
|
||||
@ -526,11 +526,11 @@ func (g *ContainerResourceGatherer) StopAndSummarize(percentiles []int, constrai
|
||||
unfinished = append(unfinished, g.workers[i].nodeName)
|
||||
}
|
||||
}
|
||||
Logf("Timed out while waiting for waitgroup, some workers failed to finish: %v", unfinished)
|
||||
framework.Logf("Timed out while waiting for waitgroup, some workers failed to finish: %v", unfinished)
|
||||
}
|
||||
|
||||
if len(percentiles) == 0 {
|
||||
Logf("Warning! Empty percentile list for stopAndPrintData.")
|
||||
framework.Logf("Warning! Empty percentile list for stopAndPrintData.")
|
||||
return &ResourceUsageSummary{}, fmt.Errorf("Failed to get any resource usage data")
|
||||
}
|
||||
data := make(map[int]ResourceUsagePerContainer)
|
||||
@ -604,7 +604,7 @@ type kubemarkResourceUsage struct {
|
||||
}
|
||||
|
||||
func getMasterUsageByPrefix(prefix string) (string, error) {
|
||||
sshResult, err := e2essh.SSH(fmt.Sprintf("ps ax -o %%cpu,rss,command | tail -n +2 | grep %v | sed 's/\\s+/ /g'", prefix), APIAddress()+":22", TestContext.Provider)
|
||||
sshResult, err := e2essh.SSH(fmt.Sprintf("ps ax -o %%cpu,rss,command | tail -n +2 | grep %v | sed 's/\\s+/ /g'", prefix), framework.APIAddress()+":22", framework.TestContext.Provider)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@ -617,7 +617,7 @@ func getKubemarkMasterComponentsResourceUsage() map[string]*kubemarkResourceUsag
|
||||
// Get kubernetes component resource usage
|
||||
sshResult, err := getMasterUsageByPrefix("kube")
|
||||
if err != nil {
|
||||
Logf("Error when trying to SSH to master machine. Skipping probe. %v", err)
|
||||
framework.Logf("Error when trying to SSH to master machine. Skipping probe. %v", err)
|
||||
return nil
|
||||
}
|
||||
scanner := bufio.NewScanner(strings.NewReader(sshResult))
|
||||
@ -635,7 +635,7 @@ func getKubemarkMasterComponentsResourceUsage() map[string]*kubemarkResourceUsag
|
||||
// Get etcd resource usage
|
||||
sshResult, err = getMasterUsageByPrefix("bin/etcd")
|
||||
if err != nil {
|
||||
Logf("Error when trying to SSH to master machine. Skipping probe")
|
||||
framework.Logf("Error when trying to SSH to master machine. Skipping probe")
|
||||
return nil
|
||||
}
|
||||
scanner = bufio.NewScanner(strings.NewReader(sshResult))
|
@ -99,12 +99,6 @@ type Framework struct {
|
||||
NamespaceDeletionTimeout time.Duration
|
||||
NamespacePodSecurityEnforceLevel admissionapi.Level // The pod security enforcement level for namespaces to be applied.
|
||||
|
||||
gatherer *ContainerResourceGatherer
|
||||
// Constraints that passed to a check which is executed after data is gathered to
|
||||
// see if 99% of results are within acceptable bounds. It has to be injected in the test,
|
||||
// as expectations vary greatly. Constraints are grouped by the container names.
|
||||
AddonResourceConstraints map[string]ResourceConstraint
|
||||
|
||||
// Flaky operation failures in an e2e test can be captured through this.
|
||||
flakeReport *FlakeReport
|
||||
|
||||
@ -164,7 +158,6 @@ func NewDefaultFramework(baseName string) *Framework {
|
||||
func NewFramework(baseName string, options Options, client clientset.Interface) *Framework {
|
||||
f := &Framework{
|
||||
BaseName: baseName,
|
||||
AddonResourceConstraints: make(map[string]ResourceConstraint),
|
||||
Options: options,
|
||||
ClientSet: client,
|
||||
Timeouts: NewTimeoutContextWithDefaults(),
|
||||
@ -256,32 +249,6 @@ func (f *Framework) BeforeEach() {
|
||||
f.UniqueName = fmt.Sprintf("%s-%08x", f.BaseName, rand.Int31())
|
||||
}
|
||||
|
||||
if TestContext.GatherKubeSystemResourceUsageData != "false" && TestContext.GatherKubeSystemResourceUsageData != "none" {
|
||||
var err error
|
||||
var nodeMode NodesSet
|
||||
switch TestContext.GatherKubeSystemResourceUsageData {
|
||||
case "master":
|
||||
nodeMode = MasterNodes
|
||||
case "masteranddns":
|
||||
nodeMode = MasterAndDNSNodes
|
||||
default:
|
||||
nodeMode = AllNodes
|
||||
}
|
||||
|
||||
f.gatherer, err = NewResourceUsageGatherer(f.ClientSet, ResourceGathererOptions{
|
||||
InKubemark: ProviderIs("kubemark"),
|
||||
Nodes: nodeMode,
|
||||
ResourceDataGatheringPeriod: 60 * time.Second,
|
||||
ProbeDuration: 15 * time.Second,
|
||||
PrintVerboseLogs: false,
|
||||
}, nil)
|
||||
if err != nil {
|
||||
Logf("Error while creating NewResourceUsageGatherer: %v", err)
|
||||
} else {
|
||||
go f.gatherer.StartGatheringData()
|
||||
}
|
||||
}
|
||||
|
||||
f.flakeReport = NewFlakeReport()
|
||||
}
|
||||
|
||||
@ -393,13 +360,6 @@ func (f *Framework) AfterEach() {
|
||||
}
|
||||
}()
|
||||
|
||||
if TestContext.GatherKubeSystemResourceUsageData != "false" && TestContext.GatherKubeSystemResourceUsageData != "none" && f.gatherer != nil {
|
||||
ginkgo.By("Collecting resource usage data")
|
||||
summary, resourceViolationError := f.gatherer.StopAndSummarize([]int{90, 99, 100}, f.AddonResourceConstraints)
|
||||
defer ExpectNoError(resourceViolationError)
|
||||
f.TestSummaries = append(f.TestSummaries, summary)
|
||||
}
|
||||
|
||||
TestContext.CloudConfig.Provider.FrameworkAfterEach(f)
|
||||
|
||||
// Report any flakes that were observed in the e2e test and reset.
|
||||
|
Loading…
Reference in New Issue
Block a user