mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 06:54:01 +00:00
Merge pull request #129951 from parkjeongryul/add-e2e-topology-manager-for-init-ctn
Add e2e test for topology manager with restartable init containers
This commit is contained in:
commit
88d2355c41
@ -69,6 +69,7 @@ type tmCtnAttribute struct {
|
||||
deviceName string
|
||||
deviceRequest string
|
||||
deviceLimit string
|
||||
restartPolicy *v1.ContainerRestartPolicy
|
||||
}
|
||||
|
||||
func detectNUMANodes() int {
|
||||
@ -158,7 +159,8 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"),
|
||||
},
|
||||
},
|
||||
Command: []string{"sh", "-c", ctnCmd},
|
||||
Command: []string{"sh", "-c", ctnCmd},
|
||||
RestartPolicy: ctnAttr.restartPolicy,
|
||||
}
|
||||
if ctnAttr.deviceName != "" {
|
||||
ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
|
||||
@ -171,8 +173,12 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
|
||||
|
||||
func makeTopologyManagerTestPod(podName string, tmCtnAttributes, tmInitCtnAttributes []tmCtnAttribute) *v1.Pod {
|
||||
var containers, initContainers []v1.Container
|
||||
if len(tmInitCtnAttributes) > 0 {
|
||||
initContainers = makeContainers(numaAlignmentCommand, tmInitCtnAttributes)
|
||||
for _, attr := range tmInitCtnAttributes {
|
||||
cmd := numaAlignmentCommand
|
||||
if attr.restartPolicy != nil && *attr.restartPolicy == v1.ContainerRestartPolicyAlways {
|
||||
cmd = numaAlignmentSleepCommand
|
||||
}
|
||||
initContainers = append(initContainers, makeContainers(cmd, []tmCtnAttribute{attr})...)
|
||||
}
|
||||
containers = makeContainers(numaAlignmentSleepCommand, tmCtnAttributes)
|
||||
|
||||
@ -346,6 +352,25 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
|
||||
}
|
||||
|
||||
func validatePodAlignment(ctx context.Context, f *framework.Framework, pod *v1.Pod, envInfo *testEnvInfo) {
|
||||
for _, cnt := range pod.Spec.InitContainers {
|
||||
// only check restartable init containers, skip regular init containers
|
||||
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
|
||||
continue
|
||||
}
|
||||
|
||||
ginkgo.By(fmt.Sprintf("validating the init container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||
framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
|
||||
framework.Logf("got init container logs: %v", logs)
|
||||
numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, envInfo)
|
||||
framework.ExpectNoError(err, "NUMA Alignment check failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
if numaRes != nil {
|
||||
framework.Logf("NUMA resources for init container %s/%s: %s", pod.Name, cnt.Name, numaRes.String())
|
||||
}
|
||||
}
|
||||
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||
|
||||
@ -367,6 +392,23 @@ func validatePodAlignmentWithPodScope(ctx context.Context, f *framework.Framewor
|
||||
podsNUMA := make(map[int]int)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("validate pod scope alignment for %s pod", pod.Name))
|
||||
for _, cnt := range pod.Spec.InitContainers {
|
||||
// only check restartable init containers, skip regular init containers
|
||||
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
|
||||
continue
|
||||
}
|
||||
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
envMap, err := makeEnvMap(logs)
|
||||
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
cpuToNUMA, err := getCPUToNUMANodeMapFromEnv(f, pod, &cnt, envMap, envInfo.numaNodes)
|
||||
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
for cpuID, numaID := range cpuToNUMA {
|
||||
podsNUMA[cpuID] = numaID
|
||||
}
|
||||
}
|
||||
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||
framework.ExpectNoError(err, "NUMA alignment failed for container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||
@ -440,7 +482,7 @@ func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework,
|
||||
}
|
||||
|
||||
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
|
||||
// we can do a menaingful validation only when using the single-numa node policy
|
||||
// we can do a meaningful validation only when using the single-numa node policy
|
||||
if envInfo.policy == topologymanager.PolicySingleNumaNode {
|
||||
for _, pod := range podMap {
|
||||
validatePodAlignment(ctx, f, pod, envInfo)
|
||||
@ -733,6 +775,94 @@ func runTMScopeResourceAlignmentTestSuite(ctx context.Context, f *framework.Fram
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with restartable init container, 1 core and 1 %s device", sd.resourceName))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with multiple restartable init containers, each container with 1 CPU core. Use 1 %s device", sd.resourceName))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
ctnName: "restartable-init-container-2",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
coresReq = fmt.Sprintf("%dm", (numCores/2+1)*1000)
|
||||
ginkgo.By(fmt.Sprintf("Trying to admin guaranteed pod with two restartable init containers where sum of their CPU requests (%d cores) exceeds NUMA capacity. The request should be rejected", (numCores/2+1)*2))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container-1",
|
||||
cpuRequest: coresReq,
|
||||
cpuLimit: coresReq,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
ctnName: "restartable-init-container-2",
|
||||
cpuRequest: coresReq,
|
||||
cpuLimit: coresReq,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
teardownSRIOVConfigOrFail(ctx, f, sd)
|
||||
}
|
||||
|
||||
@ -825,6 +955,30 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with restartable init container - each with 1 core, 1 %s device", sd.resourceName))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
// testing more complex conditions require knowledge about the system cpu+bus topology
|
||||
}
|
||||
|
||||
@ -889,6 +1043,39 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
||||
},
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Successfully admit pod with multiple restartable init containers, each with 1 core, 1 %s device", sd.resourceName))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
ctnName: "restartable-init-container-2",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
|
||||
}
|
||||
|
||||
// this is the only policy that can guarantee reliable rejects
|
||||
@ -908,6 +1095,65 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
|
||||
},
|
||||
}
|
||||
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
if sd.resourceAmount >= 3 {
|
||||
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pod with a restartable init container demanding %d cores, 1 %s device - and it should be rejected", numCores, sd.resourceName))
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container",
|
||||
cpuRequest: excessCoresReq,
|
||||
cpuLimit: excessCoresReq,
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||
|
||||
ginkgo.By("Trying to admit a guaranteed pod with two restartable init containers where the second one cannot achieve NUMA alignment - and it should be rejected")
|
||||
initCtnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "restartable-init-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
ctnName: "restartable-init-container-2",
|
||||
cpuRequest: excessCoresReq,
|
||||
cpuLimit: excessCoresReq,
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
}
|
||||
ctnAttrs = []tmCtnAttribute{
|
||||
{
|
||||
ctnName: "gu-container",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
deviceName: sd.resourceName,
|
||||
deviceRequest: "1",
|
||||
deviceLimit: "1",
|
||||
},
|
||||
}
|
||||
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user