mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-02 16:29:21 +00:00
Merge pull request #129951 from parkjeongryul/add-e2e-topology-manager-for-init-ctn
Add e2e test for topology manager with restartable init containers
This commit is contained in:
commit
88d2355c41
@ -69,6 +69,7 @@ type tmCtnAttribute struct {
|
|||||||
deviceName string
|
deviceName string
|
||||||
deviceRequest string
|
deviceRequest string
|
||||||
deviceLimit string
|
deviceLimit string
|
||||||
|
restartPolicy *v1.ContainerRestartPolicy
|
||||||
}
|
}
|
||||||
|
|
||||||
func detectNUMANodes() int {
|
func detectNUMANodes() int {
|
||||||
@ -159,6 +160,7 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
Command: []string{"sh", "-c", ctnCmd},
|
Command: []string{"sh", "-c", ctnCmd},
|
||||||
|
RestartPolicy: ctnAttr.restartPolicy,
|
||||||
}
|
}
|
||||||
if ctnAttr.deviceName != "" {
|
if ctnAttr.deviceName != "" {
|
||||||
ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
|
ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
|
||||||
@ -171,8 +173,12 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
|
|||||||
|
|
||||||
func makeTopologyManagerTestPod(podName string, tmCtnAttributes, tmInitCtnAttributes []tmCtnAttribute) *v1.Pod {
|
func makeTopologyManagerTestPod(podName string, tmCtnAttributes, tmInitCtnAttributes []tmCtnAttribute) *v1.Pod {
|
||||||
var containers, initContainers []v1.Container
|
var containers, initContainers []v1.Container
|
||||||
if len(tmInitCtnAttributes) > 0 {
|
for _, attr := range tmInitCtnAttributes {
|
||||||
initContainers = makeContainers(numaAlignmentCommand, tmInitCtnAttributes)
|
cmd := numaAlignmentCommand
|
||||||
|
if attr.restartPolicy != nil && *attr.restartPolicy == v1.ContainerRestartPolicyAlways {
|
||||||
|
cmd = numaAlignmentSleepCommand
|
||||||
|
}
|
||||||
|
initContainers = append(initContainers, makeContainers(cmd, []tmCtnAttribute{attr})...)
|
||||||
}
|
}
|
||||||
containers = makeContainers(numaAlignmentSleepCommand, tmCtnAttributes)
|
containers = makeContainers(numaAlignmentSleepCommand, tmCtnAttributes)
|
||||||
|
|
||||||
@ -346,6 +352,25 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func validatePodAlignment(ctx context.Context, f *framework.Framework, pod *v1.Pod, envInfo *testEnvInfo) {
|
func validatePodAlignment(ctx context.Context, f *framework.Framework, pod *v1.Pod, envInfo *testEnvInfo) {
|
||||||
|
for _, cnt := range pod.Spec.InitContainers {
|
||||||
|
// only check restartable init containers, skip regular init containers
|
||||||
|
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("validating the init container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||||
|
|
||||||
|
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||||
|
framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
|
||||||
|
framework.Logf("got init container logs: %v", logs)
|
||||||
|
numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, envInfo)
|
||||||
|
framework.ExpectNoError(err, "NUMA Alignment check failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
if numaRes != nil {
|
||||||
|
framework.Logf("NUMA resources for init container %s/%s: %s", pod.Name, cnt.Name, numaRes.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, cnt := range pod.Spec.Containers {
|
for _, cnt := range pod.Spec.Containers {
|
||||||
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
|
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||||
|
|
||||||
@ -367,6 +392,23 @@ func validatePodAlignmentWithPodScope(ctx context.Context, f *framework.Framewor
|
|||||||
podsNUMA := make(map[int]int)
|
podsNUMA := make(map[int]int)
|
||||||
|
|
||||||
ginkgo.By(fmt.Sprintf("validate pod scope alignment for %s pod", pod.Name))
|
ginkgo.By(fmt.Sprintf("validate pod scope alignment for %s pod", pod.Name))
|
||||||
|
for _, cnt := range pod.Spec.InitContainers {
|
||||||
|
// only check restartable init containers, skip regular init containers
|
||||||
|
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||||
|
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
envMap, err := makeEnvMap(logs)
|
||||||
|
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
cpuToNUMA, err := getCPUToNUMANodeMapFromEnv(f, pod, &cnt, envMap, envInfo.numaNodes)
|
||||||
|
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
for cpuID, numaID := range cpuToNUMA {
|
||||||
|
podsNUMA[cpuID] = numaID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, cnt := range pod.Spec.Containers {
|
for _, cnt := range pod.Spec.Containers {
|
||||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||||
framework.ExpectNoError(err, "NUMA alignment failed for container [%s] of pod [%s]", cnt.Name, pod.Name)
|
framework.ExpectNoError(err, "NUMA alignment failed for container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
@ -440,7 +482,7 @@ func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
|
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
|
||||||
// we can do a menaingful validation only when using the single-numa node policy
|
// we can do a meaningful validation only when using the single-numa node policy
|
||||||
if envInfo.policy == topologymanager.PolicySingleNumaNode {
|
if envInfo.policy == topologymanager.PolicySingleNumaNode {
|
||||||
for _, pod := range podMap {
|
for _, pod := range podMap {
|
||||||
validatePodAlignment(ctx, f, pod, envInfo)
|
validatePodAlignment(ctx, f, pod, envInfo)
|
||||||
@ -733,6 +775,94 @@ func runTMScopeResourceAlignmentTestSuite(ctx context.Context, f *framework.Fram
|
|||||||
}
|
}
|
||||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with restartable init container, 1 core and 1 %s device", sd.resourceName))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with multiple restartable init containers, each container with 1 CPU core. Use 1 %s device", sd.resourceName))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-1",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-2",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
coresReq = fmt.Sprintf("%dm", (numCores/2+1)*1000)
|
||||||
|
ginkgo.By(fmt.Sprintf("Trying to admin guaranteed pod with two restartable init containers where sum of their CPU requests (%d cores) exceeds NUMA capacity. The request should be rejected", (numCores/2+1)*2))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-1",
|
||||||
|
cpuRequest: coresReq,
|
||||||
|
cpuLimit: coresReq,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-2",
|
||||||
|
cpuRequest: coresReq,
|
||||||
|
cpuLimit: coresReq,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
teardownSRIOVConfigOrFail(ctx, f, sd)
|
teardownSRIOVConfigOrFail(ctx, f, sd)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -825,6 +955,30 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
|||||||
}
|
}
|
||||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with restartable init container - each with 1 core, 1 %s device", sd.resourceName))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
// testing more complex conditions require knowledge about the system cpu+bus topology
|
// testing more complex conditions require knowledge about the system cpu+bus topology
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -889,6 +1043,39 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("Successfully admit pod with multiple restartable init containers, each with 1 core, 1 %s device", sd.resourceName))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-1",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-2",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is the only policy that can guarantee reliable rejects
|
// this is the only policy that can guarantee reliable rejects
|
||||||
@ -908,6 +1095,65 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
if sd.resourceAmount >= 3 {
|
||||||
|
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pod with a restartable init container demanding %d cores, 1 %s device - and it should be rejected", numCores, sd.resourceName))
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container",
|
||||||
|
cpuRequest: excessCoresReq,
|
||||||
|
cpuLimit: excessCoresReq,
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
|
||||||
|
ginkgo.By("Trying to admit a guaranteed pod with two restartable init containers where the second one cannot achieve NUMA alignment - and it should be rejected")
|
||||||
|
initCtnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-1",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ctnName: "restartable-init-container-2",
|
||||||
|
cpuRequest: excessCoresReq,
|
||||||
|
cpuLimit: excessCoresReq,
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
restartPolicy: &containerRestartPolicyAlways,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctnAttrs = []tmCtnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "gu-container",
|
||||||
|
cpuRequest: "1000m",
|
||||||
|
cpuLimit: "1000m",
|
||||||
|
deviceName: sd.resourceName,
|
||||||
|
deviceRequest: "1",
|
||||||
|
deviceLimit: "1",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user