Add Job.status.uncountedPodUIDs

For tracking Job Pods that have finished but are not yet counted as failed or succeeded

And feature gate JobTrackingWithFinalizers

Change-Id: I3e080f3ec090922640384b692e88eaf9a544d3b5
This commit is contained in:
Aldo Culquicondor
2021-01-18 13:36:03 -05:00
parent 657c6fe033
commit bb56a0bd04
25 changed files with 1183 additions and 192 deletions

View File

@@ -20,10 +20,10 @@ import (
"fmt"
"github.com/robfig/cron/v3"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
apimachineryvalidation "k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/kubernetes/pkg/apis/batch"
@@ -83,11 +83,14 @@ func ValidateGeneratedSelector(obj *batch.Job) field.ErrorList {
}
// ValidateJob validates a Job and returns an ErrorList with any errors.
func ValidateJob(job *batch.Job, opts apivalidation.PodValidationOptions) field.ErrorList {
func ValidateJob(job *batch.Job, opts JobValidationOptions) field.ErrorList {
// Jobs and rcs have the same name validation
allErrs := apivalidation.ValidateObjectMeta(&job.ObjectMeta, true, apivalidation.ValidateReplicationControllerName, field.NewPath("metadata"))
allErrs = append(allErrs, ValidateGeneratedSelector(job)...)
allErrs = append(allErrs, ValidateJobSpec(&job.Spec, field.NewPath("spec"), opts)...)
allErrs = append(allErrs, ValidateJobSpec(&job.Spec, field.NewPath("spec"), opts.PodValidationOptions)...)
if !opts.AllowTrackingAnnotation && hasJobTrackingAnnotation(job) {
allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata").Child("annotations").Key(batch.JobTrackingFinalizer), "cannot add this annotation"))
}
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion && job.Spec.Completions != nil && *job.Spec.Completions > 0 {
// For indexed job, the job controller appends a suffix (`-$INDEX`)
// to the pod hostname when indexed job create pods.
@@ -101,6 +104,14 @@ func ValidateJob(job *batch.Job, opts apivalidation.PodValidationOptions) field.
return allErrs
}
func hasJobTrackingAnnotation(job *batch.Job) bool {
if job.Annotations == nil {
return false
}
_, ok := job.Annotations[batch.JobTrackingFinalizer]
return ok
}
// ValidateJobSpec validates a JobSpec and returns an ErrorList with any errors.
func ValidateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidation.PodValidationOptions) field.ErrorList {
allErrs := validateJobSpec(spec, fldPath, opts)
@@ -169,12 +180,36 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
return allErrs
}
// ValidateJobStatus validates a JobStatus and returns an ErrorList with any errors.
func ValidateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
// validateJobStatus validates a JobStatus and returns an ErrorList with any errors.
func validateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Active), fldPath.Child("active"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Succeeded), fldPath.Child("succeeded"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Failed), fldPath.Child("failed"))...)
if status.UncountedTerminatedPods != nil {
path := fldPath.Child("uncountedTerminatedPods")
seen := sets.NewString()
for i, k := range status.UncountedTerminatedPods.Succeeded {
p := path.Child("succeeded").Index(i)
if k == "" {
allErrs = append(allErrs, field.Invalid(p, k, "must not be empty"))
} else if seen.Has(string(k)) {
allErrs = append(allErrs, field.Duplicate(p, k))
} else {
seen.Insert(string(k))
}
}
for i, k := range status.UncountedTerminatedPods.Failed {
p := path.Child("failed").Index(i)
if k == "" {
allErrs = append(allErrs, field.Invalid(p, k, "must not be empty"))
} else if seen.Has(string(k)) {
allErrs = append(allErrs, field.Duplicate(p, k))
} else {
seen.Insert(string(k))
}
}
}
return allErrs
}
@@ -206,7 +241,7 @@ func ValidateJobSpecUpdate(spec, oldSpec batch.JobSpec, fldPath *field.Path, opt
// ValidateJobStatusUpdate validates an update to a JobStatus and returns an ErrorList with any errors.
func ValidateJobStatusUpdate(status, oldStatus batch.JobStatus) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, ValidateJobStatus(&status, field.NewPath("status"))...)
allErrs = append(allErrs, validateJobStatus(&status, field.NewPath("status"))...)
return allErrs
}
@@ -306,3 +341,9 @@ func ValidateJobTemplateSpec(spec *batch.JobTemplateSpec, fldPath *field.Path, o
}
return allErrs
}
type JobValidationOptions struct {
apivalidation.PodValidationOptions
// Allow Job to have the annotation batch.kubernetes.io/job-tracking
AllowTrackingAnnotation bool
}

View File

@@ -33,6 +33,8 @@ import (
"k8s.io/utils/pointer"
)
var ignoreErrValueDetail = cmpopts.IgnoreFields(field.Error{}, "BadValue", "Detail")
func getValidManualSelector() *metav1.LabelSelector {
return &metav1.LabelSelector{
MatchLabels: map[string]string{"a": "b"},
@@ -77,60 +79,91 @@ func TestValidateJob(t *testing.T) {
validGeneratedSelector := getValidGeneratedSelector()
validPodTemplateSpecForGenerated := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
successCases := map[string]batch.Job{
successCases := map[string]struct {
opts JobValidationOptions
job batch.Job
}{
"valid manual selector": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validManualSelector,
ManualSelector: pointer.BoolPtr(true),
Template: validPodTemplateSpecForManual,
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{"foo": "bar"},
},
Spec: batch.JobSpec{
Selector: validManualSelector,
ManualSelector: pointer.BoolPtr(true),
Template: validPodTemplateSpecForManual,
},
},
},
"valid generated selector": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
},
"valid NonIndexed completion mode": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.NonIndexedCompletion),
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.NonIndexedCompletion),
},
},
},
"valid Indexed completion mode": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.IndexedCompletion),
Completions: pointer.Int32Ptr(2),
Parallelism: pointer.Int32Ptr(100000),
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.IndexedCompletion),
Completions: pointer.Int32Ptr(2),
Parallelism: pointer.Int32Ptr(100000),
},
"valid job tracking annotation": {
opts: JobValidationOptions{
AllowTrackingAnnotation: true,
},
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{
batch.JobTrackingFinalizer: "",
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
},
}
for k, v := range successCases {
t.Run(k, func(t *testing.T) {
if errs := ValidateJob(&v, corevalidation.PodValidationOptions{}); len(errs) != 0 {
if errs := ValidateJob(&v.job, v.opts); len(errs) != 0 {
t.Errorf("Got unexpected validation errors: %v", errs)
}
})
@@ -306,11 +339,25 @@ func TestValidateJob(t *testing.T) {
Parallelism: pointer.Int32Ptr(100001),
},
},
"metadata.annotations[batch.kubernetes.io/job-tracking]: cannot add this annotation": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{
batch.JobTrackingFinalizer: "",
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
}
for k, v := range errorCases {
t.Run(k, func(t *testing.T) {
errs := ValidateJob(&v, corevalidation.PodValidationOptions{})
errs := ValidateJob(&v, JobValidationOptions{})
if len(errs) == 0 {
t.Errorf("expected failure for %s", k)
} else {
@@ -436,15 +483,18 @@ func TestValidateJobUpdate(t *testing.T) {
}
func TestValidateJobUpdateStatus(t *testing.T) {
type testcase struct {
old batch.Job
update batch.Job
}
successCases := []testcase{
{
cases := map[string]struct {
old batch.Job
update batch.Job
wantErrs field.ErrorList
}{
"valid": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "1",
},
Status: batch.JobStatus{
Active: 1,
Succeeded: 2,
@@ -452,7 +502,11 @@ func TestValidateJobUpdateStatus(t *testing.T) {
},
},
update: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "1",
},
Status: batch.JobStatus{
Active: 1,
Succeeded: 1,
@@ -460,18 +514,7 @@ func TestValidateJobUpdateStatus(t *testing.T) {
},
},
},
}
for _, successCase := range successCases {
successCase.old.ObjectMeta.ResourceVersion = "1"
successCase.update.ObjectMeta.ResourceVersion = "1"
if errs := ValidateJobUpdateStatus(&successCase.update, &successCase.old); len(errs) != 0 {
t.Errorf("expected success: %v", errs)
}
}
errorCases := map[string]testcase{
"[status.active: Invalid value: -1: must be greater than or equal to 0, status.succeeded: Invalid value: -2: must be greater than or equal to 0]": {
"negative counts": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
@@ -496,18 +539,48 @@ func TestValidateJobUpdateStatus(t *testing.T) {
Failed: 3,
},
},
wantErrs: field.ErrorList{
{Type: field.ErrorTypeInvalid, Field: "status.active"},
{Type: field.ErrorTypeInvalid, Field: "status.succeeded"},
},
},
"empty and duplicated uncounted pods": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "5",
},
},
update: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "5",
},
Status: batch.JobStatus{
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
Succeeded: []types.UID{"a", "b", "c", "a", ""},
Failed: []types.UID{"c", "d", "e", "d", ""},
},
},
},
wantErrs: field.ErrorList{
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.succeeded[3]"},
{Type: field.ErrorTypeInvalid, Field: "status.uncountedTerminatedPods.succeeded[4]"},
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.failed[0]"},
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.failed[3]"},
{Type: field.ErrorTypeInvalid, Field: "status.uncountedTerminatedPods.failed[4]"},
},
},
}
for testName, errorCase := range errorCases {
errs := ValidateJobUpdateStatus(&errorCase.update, &errorCase.old)
if len(errs) == 0 {
t.Errorf("expected failure: %s", testName)
continue
}
if errs.ToAggregate().Error() != testName {
t.Errorf("expected '%s' got '%s'", errs.ToAggregate().Error(), testName)
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
errs := ValidateJobUpdateStatus(&tc.update, &tc.old)
if diff := cmp.Diff(tc.wantErrs, errs, ignoreErrValueDetail); diff != "" {
t.Errorf("Unexpected errors (-want,+got):\n%s", diff)
}
})
}
}