Merge pull request #116846 from moshe010/e2e--node-pod-resources

kubelet pod-resources: add e2e for KubeletPodResourcesGet feature
This commit is contained in:
Kubernetes Prow Robot 2023-07-11 04:53:24 -07:00 committed by GitHub
commit 86038ae590
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -141,14 +141,9 @@ func logPodResources(podIdx int, pr *kubeletpodresourcesv1.PodResources) {
type podResMap map[string]map[string]kubeletpodresourcesv1.ContainerResources
func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) (podResMap, error) {
resp, err := cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
if err != nil {
return nil, err
}
func convertToMap(podsResources []*kubeletpodresourcesv1.PodResources) podResMap {
res := make(map[string]map[string]kubeletpodresourcesv1.ContainerResources)
for idx, podResource := range resp.GetPodResources() {
for idx, podResource := range podsResources {
// to make troubleshooting easier
logPodResources(idx, podResource)
@ -158,7 +153,15 @@ func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodRes
}
res[podResource.GetName()] = cnts
}
return res, nil
return res
}
func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) (podResMap, error) {
resp, err := cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
if err != nil {
return nil, err
}
return convertToMap(resp.GetPodResources()), nil
}
type testPodData struct {
@ -293,7 +296,7 @@ func podresourcesListTests(ctx context.Context, f *framework.Framework, cli kube
var err error
ginkgo.By("checking the output when no pods are present")
found, err = getPodResourcesValues(ctx, cli)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "getPodResourcesValues() failed err: %v", err)
gomega.ExpectWithOffset(1, found).To(gomega.HaveLen(expectedBasePods), "base pod expectation mismatch")
tpd = newTestPodData()
@ -568,6 +571,53 @@ func podresourcesGetAllocatableResourcesTests(ctx context.Context, cli kubeletpo
}
}
func podresourcesGetTests(ctx context.Context, f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient) {
//var err error
ginkgo.By("checking the output when no pods are present")
expected := []podDesc{}
resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
framework.ExpectError(err, "pod not found")
res := convertToMap(podResourceList)
err = matchPodDescWithResources(expected, res)
framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
tpd := newTestPodData()
ginkgo.By("checking the output when only pods which don't require resources are present")
expected = []podDesc{
{
podName: "pod-00",
cntName: "cnt-00",
},
}
tpd.createPodsForTest(ctx, f, expected)
resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-00", PodNamespace: f.Namespace.Name})
framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-00")
podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
res = convertToMap(podResourceList)
err = matchPodDescWithResources(expected, res)
framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
tpd.deletePodsForTest(ctx, f)
tpd = newTestPodData()
ginkgo.By("checking the output when only pod require CPU")
expected = []podDesc{
{
podName: "pod-01",
cntName: "cnt-00",
cpuRequest: 2000,
},
}
tpd.createPodsForTest(ctx, f, expected)
resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
res = convertToMap(podResourceList)
err = matchPodDescWithResources(expected, res)
framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
tpd.deletePodsForTest(ctx, f)
}
// Serial because the test updates kubelet configuration.
var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:PodResources]", func() {
f := framework.NewDefaultFramework("podresources-test")
@ -606,7 +656,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
ginkgo.It("should return the expected responses", func(ctx context.Context) {
onlineCPUs, err := getOnlineCPUs()
framework.ExpectNoError(err)
framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
sd := setupSRIOVConfigOrFail(ctx, f, configMap)
@ -615,10 +665,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
waitForSRIOVResources(ctx, f, sd)
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
waitForSRIOVResources(ctx, f, sd)
@ -644,10 +694,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
waitForSRIOVResources(ctx, f, sd)
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
waitForSRIOVResources(ctx, f, sd)
@ -686,21 +736,26 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
cpus := reservedSystemCPUs.String()
framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
initialConfig.ReservedSystemCPUs = cpus
if initialConfig.FeatureGates == nil {
initialConfig.FeatureGates = make(map[string]bool)
}
initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
})
ginkgo.It("should return the expected responses", func(ctx context.Context) {
onlineCPUs, err := getOnlineCPUs()
framework.ExpectNoError(err)
framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
podresourcesListTests(ctx, f, cli, nil)
podresourcesGetAllocatableResourcesTests(ctx, cli, nil, onlineCPUs, reservedSystemCPUs)
podresourcesGetTests(ctx, f, cli)
})
})
})
@ -708,10 +763,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
ginkgo.Context("with CPU manager None policy", func() {
ginkgo.It("should return the expected responses", func(ctx context.Context) {
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
// intentionally passing empty cpuset instead of onlineCPUs because with none policy
@ -730,10 +785,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
ginkgo.It("should return the expected error with the feature gate disabled", func(ctx context.Context) {
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
ginkgo.By("checking GetAllocatableResources fail if the feature gate is not enabled")
@ -742,6 +797,23 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
framework.ExpectError(err, "With feature gate disabled, the call must fail")
})
})
ginkgo.Context("with disabled KubeletPodResourcesGet feature gate", func() {
ginkgo.It("should return the expected error with the feature gate disabled", func(ctx context.Context) {
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err, "LocalEndpoint() faild err %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
defer conn.Close()
ginkgo.By("checking Get fail if the feature gate is not enabled")
getRes, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
framework.Logf("Get result: %v, err: %v", getRes, err)
framework.ExpectError(err, "With feature gate disabled, the call must fail")
})
})
})
ginkgo.Context("with a topology-unaware device plugin, which reports resources w/o hardware topology", func() {
@ -776,10 +848,10 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
waitForTopologyUnawareResources(ctx, f)
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
defer conn.Close()
ginkgo.By("checking List and resources topology unaware resource should be without topology")
@ -824,20 +896,45 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
})
ginkgo.Context("when querying /metrics [NodeConformance]", func() {
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
if initialConfig.FeatureGates == nil {
initialConfig.FeatureGates = make(map[string]bool)
}
initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
})
ginkgo.BeforeEach(func(ctx context.Context) {
// ensure APIs have been called at least once
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
defer conn.Close()
_, err = cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
framework.ExpectNoError(err)
framework.ExpectNoError(err, "List() failed err %v", err)
_, err = cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetAllocatableResources() failed err %v", err)
desc := podDesc{
podName: "pod-01",
cntName: "cnt-01",
}
tpd := newTestPodData()
tpd.createPodsForTest(ctx, f, []podDesc{
desc,
})
expectPodResources(ctx, 1, cli, []podDesc{desc})
expected := []podDesc{}
resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
res := convertToMap(podResourceList)
err = matchPodDescWithResources(expected, res)
framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
tpd.deletePodsForTest(ctx, f)
})
ginkgo.It("should report the values for the podresources metrics", func(ctx context.Context) {
@ -855,6 +952,9 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
"kubelet_pod_resources_endpoint_requests_get_allocatable": gstruct.MatchAllElements(nodeID, gstruct.Elements{
"": timelessSampleAtLeast(1),
}),
"kubelet_pod_resources_endpoint_requests_get": gstruct.MatchAllElements(nodeID, gstruct.Elements{
"": timelessSampleAtLeast(1),
}),
// not checking errors: the calls don't have non-catastrophic (e.g. out of memory) error conditions yet.
})
@ -869,11 +969,11 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
ginkgo.It("should hit throttling when calling podresources List in a tight loop", func(ctx context.Context) {
// ensure APIs have been called at least once
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
ginkgo.By("Connecting to the kubelet endpoint")
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
defer conn.Close()
tries := podresourcesgrpc.DefaultQPS * 2 // This should also be greater than DefaultBurstTokens
@ -943,7 +1043,7 @@ func setupSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework)
if err != nil {
framework.Logf("Sample Device Pod %v took too long to enter running/ready: %v", dp.Name, err)
}
framework.ExpectNoError(err)
framework.ExpectNoError(err, "WaitForPodCondition() failed err: %v", err)
return dpPod
}
@ -956,7 +1056,7 @@ func teardownSampleDevicePluginOrFail(ctx context.Context, f *framework.Framewor
ginkgo.By(fmt.Sprintf("Delete sample device plugin pod %s/%s", pod.Namespace, pod.Name))
err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions)
framework.ExpectNoError(err)
framework.ExpectNoError(err, "Failed to delete Pod %v in Namspace %v", pod.Name, pod.Namespace)
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
}