Add Job.status.uncountedPodUIDs

For tracking Job Pods that have finished but are not yet counted as failed or succeeded

And feature gate JobTrackingWithFinalizers

Change-Id: I3e080f3ec090922640384b692e88eaf9a544d3b5
This commit is contained in:
Aldo Culquicondor
2021-01-18 13:36:03 -05:00
parent 657c6fe033
commit bb56a0bd04
25 changed files with 1183 additions and 192 deletions

View File

@@ -18,9 +18,18 @@ package batch
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
api "k8s.io/kubernetes/pkg/apis/core"
)
// JobTrackingFinalizer is a finalizer for Job's pods. It prevents them from
// being deleted before being accounted in the Job status.
// The apiserver and job controller use this string as a Job annotation, to
// mark Jobs that are being tracked using pod finalizers. Two releases after
// the JobTrackingWithFinalizers graduates to GA, JobTrackingFinalizer will
// no longer be used as a Job annotation.
const JobTrackingFinalizer = "batch.kubernetes.io/job-tracking"
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Job represents the configuration of a single job.
@@ -255,6 +264,38 @@ type JobStatus struct {
// represented as "1,3-5,7".
// +optional
CompletedIndexes string
// UncountedTerminatedPods holds the UIDs of Pods that have terminated but
// the job controller hasn't yet accounted for in the status counters.
//
// The job controller creates pods with a finalizer. When a pod terminates
// (succeeded or failed), the controller does three steps to account for it
// in the job status:
// (1) Add the pod UID to the corresponding array in this field.
// (2) Remove the pod finalizer.
// (3) Remove the pod UID from the array while increasing the corresponding
// counter.
//
// This field is alpha-level. The job controller only makes use of this field
// when the feature gate PodTrackingWithFinalizers is enabled.
// Old jobs might not be tracked using this field, in which case the field
// remains null.
// +optional
UncountedTerminatedPods *UncountedTerminatedPods
}
// UncountedTerminatedPods holds UIDs of Pods that have terminated but haven't
// been accounted in Job status counters.
type UncountedTerminatedPods struct {
// Succeeded holds UIDs of succeeded Pods.
// +listType=set
// +optional
Succeeded []types.UID
// Failed holds UIDs of failed Pods.
// +listType=set
// +optional
Failed []types.UID
}
// JobConditionType is a valid value for JobCondition.Type

View File

@@ -28,6 +28,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
conversion "k8s.io/apimachinery/pkg/conversion"
runtime "k8s.io/apimachinery/pkg/runtime"
types "k8s.io/apimachinery/pkg/types"
batch "k8s.io/kubernetes/pkg/apis/batch"
core "k8s.io/kubernetes/pkg/apis/core"
apiscorev1 "k8s.io/kubernetes/pkg/apis/core/v1"
@@ -130,6 +131,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.UncountedTerminatedPods)(nil), (*batch.UncountedTerminatedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_UncountedTerminatedPods_To_batch_UncountedTerminatedPods(a.(*v1.UncountedTerminatedPods), b.(*batch.UncountedTerminatedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*batch.UncountedTerminatedPods)(nil), (*v1.UncountedTerminatedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_batch_UncountedTerminatedPods_To_v1_UncountedTerminatedPods(a.(*batch.UncountedTerminatedPods), b.(*v1.UncountedTerminatedPods), scope)
}); err != nil {
return err
}
if err := s.AddConversionFunc((*batch.JobSpec)(nil), (*v1.JobSpec)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_batch_JobSpec_To_v1_JobSpec(a.(*batch.JobSpec), b.(*v1.JobSpec), scope)
}); err != nil {
@@ -421,6 +432,7 @@ func autoConvert_v1_JobStatus_To_batch_JobStatus(in *v1.JobStatus, out *batch.Jo
out.Succeeded = in.Succeeded
out.Failed = in.Failed
out.CompletedIndexes = in.CompletedIndexes
out.UncountedTerminatedPods = (*batch.UncountedTerminatedPods)(unsafe.Pointer(in.UncountedTerminatedPods))
return nil
}
@@ -437,6 +449,7 @@ func autoConvert_batch_JobStatus_To_v1_JobStatus(in *batch.JobStatus, out *v1.Jo
out.Succeeded = in.Succeeded
out.Failed = in.Failed
out.CompletedIndexes = in.CompletedIndexes
out.UncountedTerminatedPods = (*v1.UncountedTerminatedPods)(unsafe.Pointer(in.UncountedTerminatedPods))
return nil
}
@@ -470,3 +483,25 @@ func autoConvert_batch_JobTemplateSpec_To_v1_JobTemplateSpec(in *batch.JobTempla
func Convert_batch_JobTemplateSpec_To_v1_JobTemplateSpec(in *batch.JobTemplateSpec, out *v1.JobTemplateSpec, s conversion.Scope) error {
return autoConvert_batch_JobTemplateSpec_To_v1_JobTemplateSpec(in, out, s)
}
func autoConvert_v1_UncountedTerminatedPods_To_batch_UncountedTerminatedPods(in *v1.UncountedTerminatedPods, out *batch.UncountedTerminatedPods, s conversion.Scope) error {
out.Succeeded = *(*[]types.UID)(unsafe.Pointer(&in.Succeeded))
out.Failed = *(*[]types.UID)(unsafe.Pointer(&in.Failed))
return nil
}
// Convert_v1_UncountedTerminatedPods_To_batch_UncountedTerminatedPods is an autogenerated conversion function.
func Convert_v1_UncountedTerminatedPods_To_batch_UncountedTerminatedPods(in *v1.UncountedTerminatedPods, out *batch.UncountedTerminatedPods, s conversion.Scope) error {
return autoConvert_v1_UncountedTerminatedPods_To_batch_UncountedTerminatedPods(in, out, s)
}
func autoConvert_batch_UncountedTerminatedPods_To_v1_UncountedTerminatedPods(in *batch.UncountedTerminatedPods, out *v1.UncountedTerminatedPods, s conversion.Scope) error {
out.Succeeded = *(*[]types.UID)(unsafe.Pointer(&in.Succeeded))
out.Failed = *(*[]types.UID)(unsafe.Pointer(&in.Failed))
return nil
}
// Convert_batch_UncountedTerminatedPods_To_v1_UncountedTerminatedPods is an autogenerated conversion function.
func Convert_batch_UncountedTerminatedPods_To_v1_UncountedTerminatedPods(in *batch.UncountedTerminatedPods, out *v1.UncountedTerminatedPods, s conversion.Scope) error {
return autoConvert_batch_UncountedTerminatedPods_To_v1_UncountedTerminatedPods(in, out, s)
}

View File

@@ -20,10 +20,10 @@ import (
"fmt"
"github.com/robfig/cron/v3"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
apimachineryvalidation "k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/kubernetes/pkg/apis/batch"
@@ -83,11 +83,14 @@ func ValidateGeneratedSelector(obj *batch.Job) field.ErrorList {
}
// ValidateJob validates a Job and returns an ErrorList with any errors.
func ValidateJob(job *batch.Job, opts apivalidation.PodValidationOptions) field.ErrorList {
func ValidateJob(job *batch.Job, opts JobValidationOptions) field.ErrorList {
// Jobs and rcs have the same name validation
allErrs := apivalidation.ValidateObjectMeta(&job.ObjectMeta, true, apivalidation.ValidateReplicationControllerName, field.NewPath("metadata"))
allErrs = append(allErrs, ValidateGeneratedSelector(job)...)
allErrs = append(allErrs, ValidateJobSpec(&job.Spec, field.NewPath("spec"), opts)...)
allErrs = append(allErrs, ValidateJobSpec(&job.Spec, field.NewPath("spec"), opts.PodValidationOptions)...)
if !opts.AllowTrackingAnnotation && hasJobTrackingAnnotation(job) {
allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata").Child("annotations").Key(batch.JobTrackingFinalizer), "cannot add this annotation"))
}
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion && job.Spec.Completions != nil && *job.Spec.Completions > 0 {
// For indexed job, the job controller appends a suffix (`-$INDEX`)
// to the pod hostname when indexed job create pods.
@@ -101,6 +104,14 @@ func ValidateJob(job *batch.Job, opts apivalidation.PodValidationOptions) field.
return allErrs
}
func hasJobTrackingAnnotation(job *batch.Job) bool {
if job.Annotations == nil {
return false
}
_, ok := job.Annotations[batch.JobTrackingFinalizer]
return ok
}
// ValidateJobSpec validates a JobSpec and returns an ErrorList with any errors.
func ValidateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidation.PodValidationOptions) field.ErrorList {
allErrs := validateJobSpec(spec, fldPath, opts)
@@ -169,12 +180,36 @@ func validateJobSpec(spec *batch.JobSpec, fldPath *field.Path, opts apivalidatio
return allErrs
}
// ValidateJobStatus validates a JobStatus and returns an ErrorList with any errors.
func ValidateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
// validateJobStatus validates a JobStatus and returns an ErrorList with any errors.
func validateJobStatus(status *batch.JobStatus, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Active), fldPath.Child("active"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Succeeded), fldPath.Child("succeeded"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.Failed), fldPath.Child("failed"))...)
if status.UncountedTerminatedPods != nil {
path := fldPath.Child("uncountedTerminatedPods")
seen := sets.NewString()
for i, k := range status.UncountedTerminatedPods.Succeeded {
p := path.Child("succeeded").Index(i)
if k == "" {
allErrs = append(allErrs, field.Invalid(p, k, "must not be empty"))
} else if seen.Has(string(k)) {
allErrs = append(allErrs, field.Duplicate(p, k))
} else {
seen.Insert(string(k))
}
}
for i, k := range status.UncountedTerminatedPods.Failed {
p := path.Child("failed").Index(i)
if k == "" {
allErrs = append(allErrs, field.Invalid(p, k, "must not be empty"))
} else if seen.Has(string(k)) {
allErrs = append(allErrs, field.Duplicate(p, k))
} else {
seen.Insert(string(k))
}
}
}
return allErrs
}
@@ -206,7 +241,7 @@ func ValidateJobSpecUpdate(spec, oldSpec batch.JobSpec, fldPath *field.Path, opt
// ValidateJobStatusUpdate validates an update to a JobStatus and returns an ErrorList with any errors.
func ValidateJobStatusUpdate(status, oldStatus batch.JobStatus) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, ValidateJobStatus(&status, field.NewPath("status"))...)
allErrs = append(allErrs, validateJobStatus(&status, field.NewPath("status"))...)
return allErrs
}
@@ -306,3 +341,9 @@ func ValidateJobTemplateSpec(spec *batch.JobTemplateSpec, fldPath *field.Path, o
}
return allErrs
}
type JobValidationOptions struct {
apivalidation.PodValidationOptions
// Allow Job to have the annotation batch.kubernetes.io/job-tracking
AllowTrackingAnnotation bool
}

View File

@@ -33,6 +33,8 @@ import (
"k8s.io/utils/pointer"
)
var ignoreErrValueDetail = cmpopts.IgnoreFields(field.Error{}, "BadValue", "Detail")
func getValidManualSelector() *metav1.LabelSelector {
return &metav1.LabelSelector{
MatchLabels: map[string]string{"a": "b"},
@@ -77,60 +79,91 @@ func TestValidateJob(t *testing.T) {
validGeneratedSelector := getValidGeneratedSelector()
validPodTemplateSpecForGenerated := getValidPodTemplateSpecForGenerated(validGeneratedSelector)
successCases := map[string]batch.Job{
successCases := map[string]struct {
opts JobValidationOptions
job batch.Job
}{
"valid manual selector": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validManualSelector,
ManualSelector: pointer.BoolPtr(true),
Template: validPodTemplateSpecForManual,
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{"foo": "bar"},
},
Spec: batch.JobSpec{
Selector: validManualSelector,
ManualSelector: pointer.BoolPtr(true),
Template: validPodTemplateSpecForManual,
},
},
},
"valid generated selector": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
},
"valid NonIndexed completion mode": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.NonIndexedCompletion),
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.NonIndexedCompletion),
},
},
},
"valid Indexed completion mode": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.IndexedCompletion),
Completions: pointer.Int32Ptr(2),
Parallelism: pointer.Int32Ptr(100000),
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
CompletionMode: completionModePtr(batch.IndexedCompletion),
Completions: pointer.Int32Ptr(2),
Parallelism: pointer.Int32Ptr(100000),
},
"valid job tracking annotation": {
opts: JobValidationOptions{
AllowTrackingAnnotation: true,
},
job: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{
batch.JobTrackingFinalizer: "",
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
},
}
for k, v := range successCases {
t.Run(k, func(t *testing.T) {
if errs := ValidateJob(&v, corevalidation.PodValidationOptions{}); len(errs) != 0 {
if errs := ValidateJob(&v.job, v.opts); len(errs) != 0 {
t.Errorf("Got unexpected validation errors: %v", errs)
}
})
@@ -306,11 +339,25 @@ func TestValidateJob(t *testing.T) {
Parallelism: pointer.Int32Ptr(100001),
},
},
"metadata.annotations[batch.kubernetes.io/job-tracking]: cannot add this annotation": {
ObjectMeta: metav1.ObjectMeta{
Name: "myjob",
Namespace: metav1.NamespaceDefault,
UID: types.UID("1a2b3c"),
Annotations: map[string]string{
batch.JobTrackingFinalizer: "",
},
},
Spec: batch.JobSpec{
Selector: validGeneratedSelector,
Template: validPodTemplateSpecForGenerated,
},
},
}
for k, v := range errorCases {
t.Run(k, func(t *testing.T) {
errs := ValidateJob(&v, corevalidation.PodValidationOptions{})
errs := ValidateJob(&v, JobValidationOptions{})
if len(errs) == 0 {
t.Errorf("expected failure for %s", k)
} else {
@@ -436,15 +483,18 @@ func TestValidateJobUpdate(t *testing.T) {
}
func TestValidateJobUpdateStatus(t *testing.T) {
type testcase struct {
old batch.Job
update batch.Job
}
successCases := []testcase{
{
cases := map[string]struct {
old batch.Job
update batch.Job
wantErrs field.ErrorList
}{
"valid": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "1",
},
Status: batch.JobStatus{
Active: 1,
Succeeded: 2,
@@ -452,7 +502,11 @@ func TestValidateJobUpdateStatus(t *testing.T) {
},
},
update: batch.Job{
ObjectMeta: metav1.ObjectMeta{Name: "abc", Namespace: metav1.NamespaceDefault},
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "1",
},
Status: batch.JobStatus{
Active: 1,
Succeeded: 1,
@@ -460,18 +514,7 @@ func TestValidateJobUpdateStatus(t *testing.T) {
},
},
},
}
for _, successCase := range successCases {
successCase.old.ObjectMeta.ResourceVersion = "1"
successCase.update.ObjectMeta.ResourceVersion = "1"
if errs := ValidateJobUpdateStatus(&successCase.update, &successCase.old); len(errs) != 0 {
t.Errorf("expected success: %v", errs)
}
}
errorCases := map[string]testcase{
"[status.active: Invalid value: -1: must be greater than or equal to 0, status.succeeded: Invalid value: -2: must be greater than or equal to 0]": {
"negative counts": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
@@ -496,18 +539,48 @@ func TestValidateJobUpdateStatus(t *testing.T) {
Failed: 3,
},
},
wantErrs: field.ErrorList{
{Type: field.ErrorTypeInvalid, Field: "status.active"},
{Type: field.ErrorTypeInvalid, Field: "status.succeeded"},
},
},
"empty and duplicated uncounted pods": {
old: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "5",
},
},
update: batch.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "abc",
Namespace: metav1.NamespaceDefault,
ResourceVersion: "5",
},
Status: batch.JobStatus{
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
Succeeded: []types.UID{"a", "b", "c", "a", ""},
Failed: []types.UID{"c", "d", "e", "d", ""},
},
},
},
wantErrs: field.ErrorList{
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.succeeded[3]"},
{Type: field.ErrorTypeInvalid, Field: "status.uncountedTerminatedPods.succeeded[4]"},
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.failed[0]"},
{Type: field.ErrorTypeDuplicate, Field: "status.uncountedTerminatedPods.failed[3]"},
{Type: field.ErrorTypeInvalid, Field: "status.uncountedTerminatedPods.failed[4]"},
},
},
}
for testName, errorCase := range errorCases {
errs := ValidateJobUpdateStatus(&errorCase.update, &errorCase.old)
if len(errs) == 0 {
t.Errorf("expected failure: %s", testName)
continue
}
if errs.ToAggregate().Error() != testName {
t.Errorf("expected '%s' got '%s'", errs.ToAggregate().Error(), testName)
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
errs := ValidateJobUpdateStatus(&tc.update, &tc.old)
if diff := cmp.Diff(tc.wantErrs, errs, ignoreErrValueDetail); diff != "" {
t.Errorf("Unexpected errors (-want,+got):\n%s", diff)
}
})
}
}

View File

@@ -23,6 +23,7 @@ package batch
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
types "k8s.io/apimachinery/pkg/types"
core "k8s.io/kubernetes/pkg/apis/core"
)
@@ -312,6 +313,11 @@ func (in *JobStatus) DeepCopyInto(out *JobStatus) {
in, out := &in.CompletionTime, &out.CompletionTime
*out = (*in).DeepCopy()
}
if in.UncountedTerminatedPods != nil {
in, out := &in.UncountedTerminatedPods, &out.UncountedTerminatedPods
*out = new(UncountedTerminatedPods)
(*in).DeepCopyInto(*out)
}
return
}
@@ -369,3 +375,29 @@ func (in *JobTemplateSpec) DeepCopy() *JobTemplateSpec {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UncountedTerminatedPods) DeepCopyInto(out *UncountedTerminatedPods) {
*out = *in
if in.Succeeded != nil {
in, out := &in.Succeeded, &out.Succeeded
*out = make([]types.UID, len(*in))
copy(*out, *in)
}
if in.Failed != nil {
in, out := &in.Failed, &out.Failed
*out = make([]types.UID, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UncountedTerminatedPods.
func (in *UncountedTerminatedPods) DeepCopy() *UncountedTerminatedPods {
if in == nil {
return nil
}
out := new(UncountedTerminatedPods)
in.DeepCopyInto(out)
return out
}