mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-15 06:01:50 +00:00
Add Job e2e for tracking failure count per index (#130390)
* Add Job e2e for tracking failure count per index * Review remarks
This commit is contained in:
@@ -660,6 +660,57 @@ done`}
|
|||||||
gomega.Expect(job.Status.Failed).Should(gomega.Equal(int32(1)))
|
gomega.Expect(job.Status.Failed).Should(gomega.Equal(int32(1)))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
/*
|
||||||
|
Testname: Track the failure count per index in Pod annotation when backoffLimitPerIndex is used
|
||||||
|
Description: Create an indexed job and ensure that the Pods are
|
||||||
|
re-created with the failure-count Pod annotation set properly to
|
||||||
|
indicate the number of so-far failures per index.
|
||||||
|
*/
|
||||||
|
ginkgo.It("should record the failure-count in the Pod annotation when using backoffLimitPerIndex", func(ctx context.Context) {
|
||||||
|
jobName := "e2e-backofflimitperindex-" + utilrand.String(5)
|
||||||
|
label := map[string]string{batchv1.JobNameLabel: jobName}
|
||||||
|
labelSelector := labels.SelectorFromSet(label).String()
|
||||||
|
|
||||||
|
parallelism := int32(2)
|
||||||
|
completions := int32(2)
|
||||||
|
backoffLimit := int32(6) // default value
|
||||||
|
|
||||||
|
job := e2ejob.NewTestJob("fail", jobName, v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit)
|
||||||
|
job.Spec.BackoffLimit = nil
|
||||||
|
job.Spec.BackoffLimitPerIndex = ptr.To[int32](1)
|
||||||
|
job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion)
|
||||||
|
|
||||||
|
tracker := NewIndexedPodAnnotationTracker(jobName, f.Namespace.Name, labelSelector, batchv1.JobCompletionIndexAnnotation, batchv1.JobIndexFailureCountAnnotation)
|
||||||
|
trackerCancel := tracker.Start(ctx, f.ClientSet)
|
||||||
|
ginkgo.DeferCleanup(trackerCancel)
|
||||||
|
|
||||||
|
ginkgo.By("Creating an indexed job with backoffLimit per index and failing pods")
|
||||||
|
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||||
|
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Awaiting for the job to fail as there are failed indexes")
|
||||||
|
err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Verify the failure-count annotation on Pods")
|
||||||
|
// Since the Job is already failed all the relevant Pod events are
|
||||||
|
// already being distributed. Still, there might be a little bit of lag
|
||||||
|
// between the events being receiced by the Job controller and the test
|
||||||
|
// code so we need to wait a little bit.
|
||||||
|
gomega.Eventually(ctx, tracker.cloneTrackedAnnotations).
|
||||||
|
WithTimeout(15 * time.Second).
|
||||||
|
WithPolling(500 * time.Millisecond).
|
||||||
|
Should(gomega.Equal(map[int][]string{0: {"0", "1"}, 1: {"0", "1"}}))
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the Job status fields")
|
||||||
|
job, err = e2ejob.GetJob(ctx, f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to retrieve latest job object")
|
||||||
|
gomega.Expect(job.Status.FailedIndexes).Should(gomega.HaveValue(gomega.Equal("0,1")))
|
||||||
|
gomega.Expect(job.Status.CompletedIndexes).Should(gomega.Equal(""))
|
||||||
|
gomega.Expect(job.Status.Failed).Should(gomega.Equal(int32(4)))
|
||||||
|
gomega.Expect(job.Status.Succeeded).Should(gomega.Equal(int32(0)))
|
||||||
|
})
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Testcase: Mark indexes as failed when the FailIndex action is matched in podFailurePolicy
|
Testcase: Mark indexes as failed when the FailIndex action is matched in podFailurePolicy
|
||||||
Description: Create an indexed job with backoffLimitPerIndex, and podFailurePolicy
|
Description: Create an indexed job with backoffLimitPerIndex, and podFailurePolicy
|
||||||
|
114
test/e2e/apps/util.go
Normal file
114
test/e2e/apps/util.go
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2025 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package apps
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"maps"
|
||||||
|
"strconv"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/onsi/ginkgo/v2"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
|
"k8s.io/client-go/tools/cache"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
)
|
||||||
|
|
||||||
|
type IndexedPodAnnotationTracker struct {
|
||||||
|
sync.Mutex
|
||||||
|
ownerName string
|
||||||
|
ownerNs string
|
||||||
|
labelSelector string
|
||||||
|
podIndexAnnotation string
|
||||||
|
podTrackedAnnotation string
|
||||||
|
trackedAnnotations map[int][]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewIndexedPodAnnotationTracker(ownerName, ownerNs, labelSelector, podIndexAnnotation, podTrackedAnnotation string) *IndexedPodAnnotationTracker {
|
||||||
|
return &IndexedPodAnnotationTracker{
|
||||||
|
ownerName: ownerName,
|
||||||
|
ownerNs: ownerNs,
|
||||||
|
labelSelector: labelSelector,
|
||||||
|
podIndexAnnotation: podIndexAnnotation,
|
||||||
|
podTrackedAnnotation: podTrackedAnnotation,
|
||||||
|
trackedAnnotations: make(map[int][]string),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *IndexedPodAnnotationTracker) Start(ctx context.Context, c clientset.Interface) context.CancelFunc {
|
||||||
|
trackerCtx, trackerCancel := context.WithCancel(ctx)
|
||||||
|
_, podTracker := cache.NewInformerWithOptions(cache.InformerOptions{
|
||||||
|
ListerWatcher: &cache.ListWatch{
|
||||||
|
ListWithContextFunc: func(ctx context.Context, options metav1.ListOptions) (runtime.Object, error) {
|
||||||
|
options.LabelSelector = t.labelSelector
|
||||||
|
obj, err := c.CoreV1().Pods(t.ownerNs).List(ctx, options)
|
||||||
|
return runtime.Object(obj), err
|
||||||
|
},
|
||||||
|
WatchFuncWithContext: func(ctx context.Context, options metav1.ListOptions) (watch.Interface, error) {
|
||||||
|
options.LabelSelector = t.labelSelector
|
||||||
|
return c.CoreV1().Pods(t.ownerNs).Watch(ctx, options)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ObjectType: &v1.Pod{},
|
||||||
|
Handler: cache.ResourceEventHandlerFuncs{
|
||||||
|
AddFunc: func(obj interface{}) {
|
||||||
|
defer ginkgo.GinkgoRecover()
|
||||||
|
if pod, ok := obj.(*v1.Pod); ok {
|
||||||
|
framework.Logf("Observed event for Pod %q with index=%v, annotation value=%v",
|
||||||
|
klog.KObj(pod), pod.Annotations[t.podIndexAnnotation], pod.Annotations[t.podTrackedAnnotation])
|
||||||
|
podIndex, err := strconv.Atoi(pod.Annotations[t.podIndexAnnotation])
|
||||||
|
if err != nil {
|
||||||
|
framework.Failf("failed to parse pod index for Pod %q: %v", klog.KObj(pod), err.Error())
|
||||||
|
} else {
|
||||||
|
t.Lock()
|
||||||
|
defer t.Unlock()
|
||||||
|
t.trackedAnnotations[podIndex] = append(t.trackedAnnotations[podIndex], pod.Annotations[t.podTrackedAnnotation])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
UpdateFunc: func(old, new interface{}) {
|
||||||
|
defer ginkgo.GinkgoRecover()
|
||||||
|
oldPod, oldOk := old.(*v1.Pod)
|
||||||
|
newPod, newOk := new.(*v1.Pod)
|
||||||
|
if !oldOk || !newOk {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if oldPod.Annotations[t.podTrackedAnnotation] != newPod.Annotations[t.podTrackedAnnotation] {
|
||||||
|
framework.Failf("Unexepected mutation of the annotation %q for Pod %q, old=%q, new=%q",
|
||||||
|
t.podTrackedAnnotation,
|
||||||
|
klog.KObj(newPod),
|
||||||
|
oldPod.Annotations[t.podTrackedAnnotation],
|
||||||
|
newPod.Annotations[t.podTrackedAnnotation],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
go podTracker.RunWithContext(trackerCtx)
|
||||||
|
return trackerCancel
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *IndexedPodAnnotationTracker) cloneTrackedAnnotations() map[int][]string {
|
||||||
|
t.Lock()
|
||||||
|
defer t.Unlock()
|
||||||
|
return maps.Clone(t.trackedAnnotations)
|
||||||
|
}
|
Reference in New Issue
Block a user