mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
test: Add E2E for job completions with cpu reservation
Create an E2E test that creates a job that spawns a pod that should succeed. The job reserves a fixed amount of CPU and has a large number of completions and parallelism. Use to repro github.com/kubernetes/kubernetes/issues/106884 Signed-off-by: David Porter <david@porter.me>
This commit is contained in:
parent
ca98714ec0
commit
c70f1955c4
@ -326,6 +326,14 @@ func findContainerStatus(status *v1.PodStatus, containerID string) (containerSta
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TerminatePod ensures that the status of containers is properly defaulted at the end of the pod
|
||||||
|
// lifecycle. As the Kubelet must reconcile with the container runtime to observe container status
|
||||||
|
// there is always the possibility we are unable to retrieve one or more container statuses due to
|
||||||
|
// garbage collection, admin action, or loss of temporary data on a restart. This method ensures
|
||||||
|
// that any absent container status is treated as a failure so that we do not incorrectly describe
|
||||||
|
// the pod as successful. If we have not yet initialized the pod in the presence of init containers,
|
||||||
|
// the init container failure status is sufficient to describe the pod as failing, and we do not need
|
||||||
|
// to override waiting containers (unless there is evidence the pod previously started those containers).
|
||||||
func (m *manager) TerminatePod(pod *v1.Pod) {
|
func (m *manager) TerminatePod(pod *v1.Pod) {
|
||||||
m.podStatusesLock.Lock()
|
m.podStatusesLock.Lock()
|
||||||
defer m.podStatusesLock.Unlock()
|
defer m.podStatusesLock.Unlock()
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
batchv1 "k8s.io/api/batch/v1"
|
batchv1 "k8s.io/api/batch/v1"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
@ -35,6 +36,7 @@ import (
|
|||||||
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
||||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||||
e2eresource "k8s.io/kubernetes/test/e2e/framework/resource"
|
e2eresource "k8s.io/kubernetes/test/e2e/framework/resource"
|
||||||
|
"k8s.io/kubernetes/test/e2e/scheduling"
|
||||||
"k8s.io/utils/pointer"
|
"k8s.io/utils/pointer"
|
||||||
|
|
||||||
"github.com/onsi/ginkgo"
|
"github.com/onsi/ginkgo"
|
||||||
@ -45,6 +47,10 @@ var _ = SIGDescribe("Job", func() {
|
|||||||
f := framework.NewDefaultFramework("job")
|
f := framework.NewDefaultFramework("job")
|
||||||
parallelism := int32(2)
|
parallelism := int32(2)
|
||||||
completions := int32(4)
|
completions := int32(4)
|
||||||
|
|
||||||
|
largeParallelism := int32(90)
|
||||||
|
largeCompletions := int32(90)
|
||||||
|
|
||||||
backoffLimit := int32(6) // default value
|
backoffLimit := int32(6) // default value
|
||||||
|
|
||||||
// Simplest case: N pods succeed
|
// Simplest case: N pods succeed
|
||||||
@ -361,6 +367,52 @@ var _ = SIGDescribe("Job", func() {
|
|||||||
framework.ExpectEqual(pod.Status.Phase, v1.PodFailed)
|
framework.ExpectEqual(pod.Status.Phase, v1.PodFailed)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
ginkgo.It("should run a job to completion with CPU requests [Serial]", func() {
|
||||||
|
ginkgo.By("Creating a job that with CPU requests")
|
||||||
|
|
||||||
|
testNodeName := scheduling.GetNodeThatCanRunPod(f)
|
||||||
|
targetNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName)
|
||||||
|
|
||||||
|
cpu, ok := targetNode.Status.Allocatable[v1.ResourceCPU]
|
||||||
|
if !ok {
|
||||||
|
framework.Failf("Unable to get node's %q cpu", targetNode.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuRequest := fmt.Sprint(int64(0.2 * float64(cpu.Value())))
|
||||||
|
|
||||||
|
backoff := 0
|
||||||
|
ginkgo.By("Creating a job")
|
||||||
|
job := e2ejob.NewTestJob("succeed", "all-succeed", v1.RestartPolicyNever, largeParallelism, largeCompletions, nil, int32(backoff))
|
||||||
|
for i := range job.Spec.Template.Spec.Containers {
|
||||||
|
job.Spec.Template.Spec.Containers[i].Resources = v1.ResourceRequirements{
|
||||||
|
Requests: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse(cpuRequest),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
job.Spec.Template.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": testNodeName}
|
||||||
|
}
|
||||||
|
|
||||||
|
framework.Logf("Creating job %q with a node hostname selector %q wth cpu request %q", job.Name, testNodeName, cpuRequest)
|
||||||
|
job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job)
|
||||||
|
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Ensuring job reaches completions")
|
||||||
|
err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, largeCompletions)
|
||||||
|
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Ensuring pods for job exist")
|
||||||
|
pods, err := e2ejob.GetJobPods(f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to get pod list for job in namespace: %s", f.Namespace.Name)
|
||||||
|
successes := int32(0)
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
if pod.Status.Phase == v1.PodSucceeded {
|
||||||
|
successes++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
framework.ExpectEqual(successes, largeCompletions, "expected %d successful job pods, but got %d", largeCompletions, successes)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// waitForJobFailure uses c to wait for up to timeout for the Job named jobName in namespace ns to fail.
|
// waitForJobFailure uses c to wait for up to timeout for the Job named jobName in namespace ns to fail.
|
||||||
|
Loading…
Reference in New Issue
Block a user