refactor runWorkloads

This commit is contained in:
YamasouA 2025-01-26 19:39:38 +09:00
parent 8f8c94a04d
commit 659804b765

View File

@ -1450,6 +1450,40 @@ func stopCollectingMetrics(tCtx ktesting.TContext, collectorCtx ktesting.TContex
return dataItems return dataItems
} }
type MetricsCollectionData struct {
Collectors []testDataCollector
// This needs a separate context and wait group because
// the metrics collecting needs to be sure that the goroutines
// are stopped.
CollectorCtx ktesting.TContext
CollectorWG *sync.WaitGroup
// Disable error checking of the sampling interval length in the
// throughput collector by default. When running benchmarks, report
// it as test failure when samples are not taken regularly.
ThroughputErrorMargin float64
}
type WorkloadState struct {
DataItems []DataItem
NextNodeIndex int
// numPodsScheduledPerNamespace has all namespaces created in workload and the number of pods they (will) have.
// All namespaces listed in numPodsScheduledPerNamespace will be cleaned up.
NumPodsScheduledPerNamespace map[string]int
}
type SharedOperationData struct {
// Additional informers needed for testing. The pod informer was
// already created before (scheduler.NewInformerFactory) and the
// factory was started for it (mustSetupCluster), therefore we don't
// need to start again.
PodInformer coreinformers.PodInformer
MetricsData *MetricsCollectionData
WorkloadState *WorkloadState
TCtx ktesting.TContext
WG sync.WaitGroup
CancelFunc context.CancelFunc
}
func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFactory informers.SharedInformerFactory) []DataItem { func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFactory informers.SharedInformerFactory) []DataItem {
b, benchmarking := tCtx.TB().(*testing.B) b, benchmarking := tCtx.TB().(*testing.B)
if benchmarking { if benchmarking {
@ -1463,9 +1497,6 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
}) })
} }
// Disable error checking of the sampling interval length in the
// throughput collector by default. When running benchmarks, report
// it as test failure when samples are not taken regularly.
var throughputErrorMargin float64 var throughputErrorMargin float64
if benchmarking { if benchmarking {
// TODO: To prevent the perf-test failure, we increased the error margin, if still not enough // TODO: To prevent the perf-test failure, we increased the error margin, if still not enough
@ -1473,12 +1504,6 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
throughputErrorMargin = 30 throughputErrorMargin = 30
} }
// Additional informers needed for testing. The pod informer was
// already created before (scheduler.NewInformerFactory) and the
// factory was started for it (mustSetupCluster), therefore we don't
// need to start again.
podInformer := informerFactory.Core().V1().Pods()
// Everything else started by this function gets stopped before it returns. // Everything else started by this function gets stopped before it returns.
tCtx = ktesting.WithCancel(tCtx) tCtx = ktesting.WithCancel(tCtx)
var wg sync.WaitGroup var wg sync.WaitGroup
@ -1486,111 +1511,122 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
defer tCtx.Cancel("workload is done") defer tCtx.Cancel("workload is done")
var dataItems []DataItem var dataItems []DataItem
nextNodeIndex := 0
// numPodsScheduledPerNamespace has all namespaces created in workload and the number of pods they (will) have.
// All namespaces listed in numPodsScheduledPerNamespace will be cleaned up.
numPodsScheduledPerNamespace := make(map[string]int)
var collectors []testDataCollector
// This needs a separate context and wait group because
// the metrics collecting needs to be sure that the goroutines
// are stopped.
var collectorCtx ktesting.TContext
var collectorWG sync.WaitGroup var collectorWG sync.WaitGroup
defer collectorWG.Wait() defer collectorWG.Wait()
for opIndex, op := range unrollWorkloadTemplate(tCtx, tc.WorkloadTemplate, w) { sharedOperationData := SharedOperationData{
realOp, err := op.realOp.patchParams(w) TCtx: tCtx,
if err != nil { WG: wg,
tCtx.Fatalf("op %d: %v", opIndex, err) MetricsData: &MetricsCollectionData{
CollectorWG: &sync.WaitGroup{},
ThroughputErrorMargin: throughputErrorMargin,
},
WorkloadState: &WorkloadState{
NumPodsScheduledPerNamespace: make(map[string]int),
},
PodInformer: informerFactory.Core().V1().Pods(),
} }
select {
case <-tCtx.Done():
tCtx.Fatalf("op %d: %v", opIndex, context.Cause(tCtx))
default:
}
switch concreteOp := realOp.(type) {
case *createNodesOp:
nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), concreteOp, tCtx.Client())
if err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err)
}
if err := nodePreparer.PrepareNodes(tCtx, nextNodeIndex); err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err)
}
nextNodeIndex += concreteOp.Count
case *createNamespacesOp: for opIndex, op := range unrollWorkloadTemplate(tCtx, tc.WorkloadTemplate, w) {
nsPreparer, err := newNamespacePreparer(tCtx, concreteOp) runOperation(tc, opIndex, op, w, &sharedOperationData)
if err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err)
} }
if err := nsPreparer.prepare(tCtx); err != nil {
err2 := nsPreparer.cleanup(tCtx) // check unused params and inform users
unusedParams := w.unusedParams()
if len(unusedParams) != 0 {
tCtx.Fatalf("the parameters %v are defined on workload %s, but unused.\nPlease make sure there are no typos.", unusedParams, w.Name)
}
// Some tests have unschedulable pods. Do not add an implicit barrier at the
// end as we do not want to wait for them.
return dataItems
}
func runCreateNodesOp(opIndex int, concreteOp *createNodesOp, sharedOperationData *SharedOperationData) {
nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), concreteOp, sharedOperationData.TCtx.Client())
if err != nil {
sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
}
if err := nodePreparer.PrepareNodes(sharedOperationData.TCtx, sharedOperationData.WorkloadState.NextNodeIndex); err != nil {
sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
}
sharedOperationData.WorkloadState.NextNodeIndex += concreteOp.Count
}
func runCreateNamespacesOp(opIndex int, concreteOp *createNamespacesOp, sharedOperationData *SharedOperationData) {
nsPreparer, err := newNamespacePreparer(sharedOperationData.TCtx, concreteOp)
if err != nil {
sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
}
if err := nsPreparer.prepare(sharedOperationData.TCtx); err != nil {
err2 := nsPreparer.cleanup(sharedOperationData.TCtx)
if err2 != nil { if err2 != nil {
err = fmt.Errorf("prepare: %v; cleanup: %v", err, err2) err = fmt.Errorf("prepare: %v; cleanup: %v", err, err2)
} }
tCtx.Fatalf("op %d: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
} }
for _, n := range nsPreparer.namespaces() { for _, n := range nsPreparer.namespaces() {
if _, ok := numPodsScheduledPerNamespace[n]; ok { if _, ok := sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace[n]; ok {
// this namespace has been already created. // this namespace has been already created.
continue continue
} }
numPodsScheduledPerNamespace[n] = 0 sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace[n] = 0
}
} }
case *createPodsOp: func runCreatePodsOp(tc *testCase, w *workload, opIndex int, concreteOp *createPodsOp, sharedOperationData *SharedOperationData) {
var namespace string var namespace string
// define Pod's namespace automatically, and create that namespace. // define Pod's namespace automatically, and create that namespace.
namespace = fmt.Sprintf("namespace-%d", opIndex) namespace = fmt.Sprintf("namespace-%d", opIndex)
if concreteOp.Namespace != nil { if concreteOp.Namespace != nil {
namespace = *concreteOp.Namespace namespace = *concreteOp.Namespace
} }
createNamespaceIfNotPresent(tCtx, namespace, &numPodsScheduledPerNamespace) createNamespaceIfNotPresent(sharedOperationData.TCtx, namespace, &sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace)
if concreteOp.PodTemplatePath == nil { if concreteOp.PodTemplatePath == nil {
concreteOp.PodTemplatePath = tc.DefaultPodTemplatePath concreteOp.PodTemplatePath = tc.DefaultPodTemplatePath
} }
if concreteOp.CollectMetrics { if concreteOp.CollectMetrics {
if collectorCtx != nil { if sharedOperationData.MetricsData.CollectorCtx != nil {
tCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex) sharedOperationData.TCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
} }
collectorCtx, collectors = startCollectingMetrics(tCtx, &collectorWG, podInformer, tc.MetricsCollectorConfig, throughputErrorMargin, opIndex, namespace, []string{namespace}, nil) sharedOperationData.MetricsData.CollectorCtx, sharedOperationData.MetricsData.Collectors = startCollectingMetrics(sharedOperationData.TCtx, sharedOperationData.MetricsData.CollectorWG, sharedOperationData.PodInformer, tc.MetricsCollectorConfig, sharedOperationData.MetricsData.ThroughputErrorMargin, opIndex, namespace, []string{namespace}, nil)
defer collectorCtx.Cancel("cleaning up") defer sharedOperationData.MetricsData.CollectorCtx.Cancel("cleaning up")
} }
if err := createPodsRapidly(tCtx, namespace, concreteOp); err != nil { if err := createPodsRapidly(sharedOperationData.TCtx, namespace, concreteOp); err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
} }
switch { switch {
case concreteOp.SkipWaitToCompletion: case concreteOp.SkipWaitToCompletion:
// Only record those namespaces that may potentially require barriers // Only record those namespaces that may potentially require barriers
// in the future. // in the future.
numPodsScheduledPerNamespace[namespace] += concreteOp.Count sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace[namespace] += concreteOp.Count
case concreteOp.SteadyState: case concreteOp.SteadyState:
if err := createPodsSteadily(tCtx, namespace, podInformer, concreteOp); err != nil { if err := createPodsSteadily(sharedOperationData.TCtx, namespace, sharedOperationData.PodInformer, concreteOp); err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
} }
default: default:
if err := waitUntilPodsScheduledInNamespace(tCtx, podInformer, nil, namespace, concreteOp.Count); err != nil { if err := waitUntilPodsScheduledInNamespace(sharedOperationData.TCtx, sharedOperationData.PodInformer, nil, namespace, concreteOp.Count); err != nil {
tCtx.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
} }
} }
if concreteOp.CollectMetrics { if concreteOp.CollectMetrics {
// CollectMetrics and SkipWaitToCompletion can never be true at the // CollectMetrics and SkipWaitToCompletion can never be true at the
// same time, so if we're here, it means that all pods have been // same time, so if we're here, it means that all pods have been
// scheduled. // scheduled.
items := stopCollectingMetrics(tCtx, collectorCtx, &collectorWG, w.Threshold, *w.ThresholdMetricSelector, opIndex, collectors) items := stopCollectingMetrics(sharedOperationData.TCtx, sharedOperationData.MetricsData.CollectorCtx, sharedOperationData.MetricsData.CollectorWG, w.Threshold, *w.ThresholdMetricSelector, opIndex, sharedOperationData.MetricsData.Collectors)
dataItems = append(dataItems, items...) sharedOperationData.WorkloadState.DataItems = append(sharedOperationData.WorkloadState.DataItems, items...)
collectorCtx = nil sharedOperationData.MetricsData.CollectorCtx = nil
}
} }
case *deletePodsOp: func runDeletePodsOp(opIndex int, concreteOp *deletePodsOp, sharedOperationData *SharedOperationData) {
labelSelector := labels.ValidatedSetSelector(concreteOp.LabelSelector) labelSelector := labels.ValidatedSetSelector(concreteOp.LabelSelector)
podsToDelete, err := podInformer.Lister().Pods(concreteOp.Namespace).List(labelSelector) podsToDelete, err := sharedOperationData.PodInformer.Lister().Pods(concreteOp.Namespace).List(labelSelector)
if err != nil { if err != nil {
tCtx.Fatalf("op %d: error in listing pods in the namespace %s: %v", opIndex, concreteOp.Namespace, err) sharedOperationData.TCtx.Fatalf("op %d: error in listing pods in the namespace %s: %v", opIndex, concreteOp.Namespace, err)
} }
deletePods := func(opIndex int) { deletePods := func(opIndex int) {
@ -1601,13 +1637,13 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
for i := 0; i < len(podsToDelete); i++ { for i := 0; i < len(podsToDelete); i++ {
select { select {
case <-ticker.C: case <-ticker.C:
if err := tCtx.Client().CoreV1().Pods(concreteOp.Namespace).Delete(tCtx, podsToDelete[i].Name, metav1.DeleteOptions{}); err != nil { if err := sharedOperationData.TCtx.Client().CoreV1().Pods(concreteOp.Namespace).Delete(sharedOperationData.TCtx, podsToDelete[i].Name, metav1.DeleteOptions{}); err != nil {
if errors.Is(err, context.Canceled) { if errors.Is(err, context.Canceled) {
return return
} }
tCtx.Errorf("op %d: unable to delete pod %v: %v", opIndex, podsToDelete[i].Name, err) sharedOperationData.TCtx.Errorf("op %d: unable to delete pod %v: %v", opIndex, podsToDelete[i].Name, err)
} }
case <-tCtx.Done(): case <-sharedOperationData.TCtx.Done():
return return
} }
} }
@ -1616,36 +1652,37 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
listOpts := metav1.ListOptions{ listOpts := metav1.ListOptions{
LabelSelector: labelSelector.String(), LabelSelector: labelSelector.String(),
} }
if err := tCtx.Client().CoreV1().Pods(concreteOp.Namespace).DeleteCollection(tCtx, metav1.DeleteOptions{}, listOpts); err != nil { if err := sharedOperationData.TCtx.Client().CoreV1().Pods(concreteOp.Namespace).DeleteCollection(sharedOperationData.TCtx, metav1.DeleteOptions{}, listOpts); err != nil {
if errors.Is(err, context.Canceled) { if errors.Is(err, context.Canceled) {
return return
} }
tCtx.Errorf("op %d: unable to delete pods in namespace %v: %v", opIndex, concreteOp.Namespace, err) sharedOperationData.TCtx.Errorf("op %d: unable to delete pods in namespace %v: %v", opIndex, concreteOp.Namespace, err)
} }
} }
if concreteOp.SkipWaitToCompletion { if concreteOp.SkipWaitToCompletion {
wg.Add(1) sharedOperationData.WG.Add(1)
go func(opIndex int) { go func(opIndex int) {
defer wg.Done() defer sharedOperationData.WG.Done()
deletePods(opIndex) deletePods(opIndex)
}(opIndex) }(opIndex)
} else { } else {
deletePods(opIndex) deletePods(opIndex)
} }
}
case *churnOp: func runChurnOp(opIndex int, concreteOp *churnOp, sharedOperationData *SharedOperationData) {
var namespace string var namespace string
if concreteOp.Namespace != nil { if concreteOp.Namespace != nil {
namespace = *concreteOp.Namespace namespace = *concreteOp.Namespace
} else { } else {
namespace = fmt.Sprintf("namespace-%d", opIndex) namespace = fmt.Sprintf("namespace-%d", opIndex)
} }
restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cacheddiscovery.NewMemCacheClient(tCtx.Client().Discovery())) restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cacheddiscovery.NewMemCacheClient(sharedOperationData.TCtx.Client().Discovery()))
// Ensure the namespace exists. // Ensure the namespace exists.
nsObj := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} nsObj := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}
if _, err := tCtx.Client().CoreV1().Namespaces().Create(tCtx, nsObj, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { if _, err := sharedOperationData.TCtx.Client().CoreV1().Namespaces().Create(sharedOperationData.TCtx, nsObj, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) {
tCtx.Fatalf("op %d: unable to create namespace %v: %v", opIndex, namespace, err) sharedOperationData.TCtx.Fatalf("op %d: unable to create namespace %v: %v", opIndex, namespace, err)
} }
var churnFns []func(name string) string var churnFns []func(name string) string
@ -1653,31 +1690,31 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
for i, path := range concreteOp.TemplatePaths { for i, path := range concreteOp.TemplatePaths {
unstructuredObj, gvk, err := getUnstructuredFromFile(path) unstructuredObj, gvk, err := getUnstructuredFromFile(path)
if err != nil { if err != nil {
tCtx.Fatalf("op %d: unable to parse the %v-th template path: %v", opIndex, i, err) sharedOperationData.TCtx.Fatalf("op %d: unable to parse the %v-th template path: %v", opIndex, i, err)
} }
// Obtain GVR. // Obtain GVR.
mapping, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version) mapping, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version)
if err != nil { if err != nil {
tCtx.Fatalf("op %d: unable to find GVR for %v: %v", opIndex, gvk, err) sharedOperationData.TCtx.Fatalf("op %d: unable to find GVR for %v: %v", opIndex, gvk, err)
} }
gvr := mapping.Resource gvr := mapping.Resource
// Distinguish cluster-scoped with namespaced API objects. // Distinguish cluster-scoped with namespaced API objects.
var dynRes dynamic.ResourceInterface var dynRes dynamic.ResourceInterface
if mapping.Scope.Name() == meta.RESTScopeNameNamespace { if mapping.Scope.Name() == meta.RESTScopeNameNamespace {
dynRes = tCtx.Dynamic().Resource(gvr).Namespace(namespace) dynRes = sharedOperationData.TCtx.Dynamic().Resource(gvr).Namespace(namespace)
} else { } else {
dynRes = tCtx.Dynamic().Resource(gvr) dynRes = sharedOperationData.TCtx.Dynamic().Resource(gvr)
} }
churnFns = append(churnFns, func(name string) string { churnFns = append(churnFns, func(name string) string {
if name != "" { if name != "" {
if err := dynRes.Delete(tCtx, name, metav1.DeleteOptions{}); err != nil && !errors.Is(err, context.Canceled) { if err := dynRes.Delete(sharedOperationData.TCtx, name, metav1.DeleteOptions{}); err != nil && !errors.Is(err, context.Canceled) {
tCtx.Errorf("op %d: unable to delete %v: %v", opIndex, name, err) sharedOperationData.TCtx.Errorf("op %d: unable to delete %v: %v", opIndex, name, err)
} }
return "" return ""
} }
live, err := dynRes.Create(tCtx, unstructuredObj, metav1.CreateOptions{}) live, err := dynRes.Create(sharedOperationData.TCtx, unstructuredObj, metav1.CreateOptions{})
if err != nil { if err != nil {
return "" return ""
} }
@ -1694,9 +1731,9 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
switch concreteOp.Mode { switch concreteOp.Mode {
case Create: case Create:
wg.Add(1) sharedOperationData.WG.Add(1)
go func() { go func() {
defer wg.Done() defer sharedOperationData.WG.Done()
count, threshold := 0, concreteOp.Number count, threshold := 0, concreteOp.Number
if threshold == 0 { if threshold == 0 {
threshold = math.MaxInt32 threshold = math.MaxInt32
@ -1708,15 +1745,15 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
churnFns[i]("") churnFns[i]("")
} }
count++ count++
case <-tCtx.Done(): case <-sharedOperationData.TCtx.Done():
return return
} }
} }
}() }()
case Recreate: case Recreate:
wg.Add(1) sharedOperationData.WG.Add(1)
go func() { go func() {
defer wg.Done() defer sharedOperationData.WG.Done()
retVals := make([][]string, len(churnFns)) retVals := make([][]string, len(churnFns))
// For each churn function, instantiate a slice of strings with length "concreteOp.Number". // For each churn function, instantiate a slice of strings with length "concreteOp.Number".
for i := range retVals { for i := range retVals {
@ -1731,81 +1768,109 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
retVals[i][count%concreteOp.Number] = churnFns[i](retVals[i][count%concreteOp.Number]) retVals[i][count%concreteOp.Number] = churnFns[i](retVals[i][count%concreteOp.Number])
} }
count++ count++
case <-tCtx.Done(): case <-sharedOperationData.TCtx.Done():
return return
} }
} }
}() }()
} }
}
case *barrierOp: func runBarrierOp(opIndex int, concreteOp *barrierOp, sharedOperationData *SharedOperationData) {
for _, namespace := range concreteOp.Namespaces { for _, namespace := range concreteOp.Namespaces {
if _, ok := numPodsScheduledPerNamespace[namespace]; !ok { if _, ok := sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace[namespace]; !ok {
tCtx.Fatalf("op %d: unknown namespace %s", opIndex, namespace) sharedOperationData.TCtx.Fatalf("op %d: unknown namespace %s", opIndex, namespace)
} }
} }
switch concreteOp.StageRequirement { switch concreteOp.StageRequirement {
case Attempted: case Attempted:
if err := waitUntilPodsAttempted(tCtx, podInformer, concreteOp.LabelSelector, concreteOp.Namespaces, numPodsScheduledPerNamespace); err != nil { if err := waitUntilPodsAttempted(sharedOperationData.TCtx, sharedOperationData.PodInformer, concreteOp.LabelSelector, concreteOp.Namespaces, sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace); err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
} }
case Scheduled: case Scheduled:
// Default should be treated like "Scheduled", so handling both in the same way. // Default should be treated like "Scheduled", so handling both in the same way.
fallthrough fallthrough
default: default:
if err := waitUntilPodsScheduled(tCtx, podInformer, concreteOp.LabelSelector, concreteOp.Namespaces, numPodsScheduledPerNamespace); err != nil { if err := waitUntilPodsScheduled(sharedOperationData.TCtx, sharedOperationData.PodInformer, concreteOp.LabelSelector, concreteOp.Namespaces, sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace); err != nil {
tCtx.Fatalf("op %d: %v", opIndex, err) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
} }
// At the end of the barrier, we can be sure that there are no pods // At the end of the barrier, we can be sure that there are no pods
// pending scheduling in the namespaces that we just blocked on. // pending scheduling in the namespaces that we just blocked on.
if len(concreteOp.Namespaces) == 0 { if len(concreteOp.Namespaces) == 0 {
numPodsScheduledPerNamespace = make(map[string]int) sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace = make(map[string]int)
} else { } else {
for _, namespace := range concreteOp.Namespaces { for _, namespace := range concreteOp.Namespaces {
delete(numPodsScheduledPerNamespace, namespace) delete(sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace, namespace)
}
} }
} }
} }
case *sleepOp: func runSleepOp(concreteOp *sleepOp, sharedOperationData *SharedOperationData) {
select { select {
case <-tCtx.Done(): case <-sharedOperationData.TCtx.Done():
case <-time.After(concreteOp.Duration.Duration): case <-time.After(concreteOp.Duration.Duration):
} }
case *startCollectingMetricsOp:
if collectorCtx != nil {
tCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
} }
collectorCtx, collectors = startCollectingMetrics(tCtx, &collectorWG, podInformer, tc.MetricsCollectorConfig, throughputErrorMargin, opIndex, concreteOp.Name, concreteOp.Namespaces, concreteOp.LabelSelector)
defer collectorCtx.Cancel("cleaning up")
case *stopCollectingMetricsOp: func runStartCollectingMetricsOp(opIndex int, tc *testCase, concreteOp *startCollectingMetricsOp, sharedOperationData *SharedOperationData) {
items := stopCollectingMetrics(tCtx, collectorCtx, &collectorWG, w.Threshold, *w.ThresholdMetricSelector, opIndex, collectors) if sharedOperationData.MetricsData.CollectorCtx != nil {
dataItems = append(dataItems, items...) sharedOperationData.TCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
collectorCtx = nil }
sharedOperationData.MetricsData.CollectorCtx, sharedOperationData.MetricsData.Collectors = startCollectingMetrics(sharedOperationData.TCtx, sharedOperationData.MetricsData.CollectorWG, sharedOperationData.PodInformer, tc.MetricsCollectorConfig, sharedOperationData.MetricsData.ThroughputErrorMargin, opIndex, concreteOp.Name, concreteOp.Namespaces, concreteOp.LabelSelector)
default: defer sharedOperationData.MetricsData.CollectorCtx.Cancel("cleaning up")
}
func runStopCollectingMetricsOp(opIndex int, w *workload, sharedOperationData *SharedOperationData) {
items := stopCollectingMetrics(sharedOperationData.TCtx, sharedOperationData.MetricsData.CollectorCtx, sharedOperationData.MetricsData.CollectorWG, w.Threshold, *w.ThresholdMetricSelector, opIndex, sharedOperationData.MetricsData.Collectors)
sharedOperationData.WorkloadState.DataItems = append(sharedOperationData.WorkloadState.DataItems, items...)
sharedOperationData.MetricsData.CollectorCtx = nil
}
func runDefault(opIndex int, concreteOp realOp, sharedOperationData *SharedOperationData) {
runable, ok := concreteOp.(runnableOp) runable, ok := concreteOp.(runnableOp)
if !ok { if !ok {
tCtx.Fatalf("op %d: invalid op %v", opIndex, concreteOp) sharedOperationData.TCtx.Fatalf("op %d: invalid op %v", opIndex, concreteOp)
} }
for _, namespace := range runable.requiredNamespaces() { for _, namespace := range runable.requiredNamespaces() {
createNamespaceIfNotPresent(tCtx, namespace, &numPodsScheduledPerNamespace) createNamespaceIfNotPresent(sharedOperationData.TCtx, namespace, &sharedOperationData.WorkloadState.NumPodsScheduledPerNamespace)
}
runable.run(tCtx)
} }
runable.run(sharedOperationData.TCtx)
} }
// check unused params and inform users func runOperation(tc *testCase, opIndex int, op op, w *workload, sharedOperationData *SharedOperationData) {
unusedParams := w.unusedParams() realOp, err := op.realOp.patchParams(w)
if len(unusedParams) != 0 { if err != nil {
tCtx.Fatalf("the parameters %v are defined on workload %s, but unused.\nPlease make sure there are no typos.", unusedParams, w.Name) sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, err)
}
select {
case <-sharedOperationData.TCtx.Done():
sharedOperationData.TCtx.Fatalf("op %d: %v", opIndex, context.Cause(sharedOperationData.TCtx))
default:
}
switch concreteOp := realOp.(type) {
case *createNodesOp:
runCreateNodesOp(opIndex, concreteOp, sharedOperationData)
case *createNamespacesOp:
runCreateNamespacesOp(opIndex, concreteOp, sharedOperationData)
case *createPodsOp:
runCreatePodsOp(tc, w, opIndex, concreteOp, sharedOperationData)
case *deletePodsOp:
runDeletePodsOp(opIndex, concreteOp, sharedOperationData)
case *churnOp:
runChurnOp(opIndex, concreteOp, sharedOperationData)
case *barrierOp:
runBarrierOp(opIndex, concreteOp, sharedOperationData)
case *sleepOp:
runSleepOp(concreteOp, sharedOperationData)
case *startCollectingMetricsOp:
runStartCollectingMetricsOp(opIndex, tc, concreteOp, sharedOperationData)
case *stopCollectingMetricsOp:
runStopCollectingMetricsOp(opIndex, w, sharedOperationData)
default:
runDefault(opIndex, concreteOp, sharedOperationData)
} }
// Some tests have unschedulable pods. Do not add an implicit barrier at the
// end as we do not want to wait for them.
return dataItems
} }
func createNamespaceIfNotPresent(tCtx ktesting.TContext, namespace string, podsPerNamespace *map[string]int) { func createNamespaceIfNotPresent(tCtx ktesting.TContext, namespace string, podsPerNamespace *map[string]int) {