mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-02-21 22:57:15 +00:00
Merge pull request #136480 from rogowski-piotr/automated-cherry-pick-of-#135919-upstream-release-1.34
Automated cherry pick of #135919: kubelet(dra): fix handling of multiple ResourceClaims when one is already prepared
This commit is contained in:
@@ -359,10 +359,10 @@ func (m *Manager) prepareResources(ctx context.Context, pod *v1.Pod) error {
|
||||
return fmt.Errorf("checkpoint ResourceClaim cache: %w", err)
|
||||
}
|
||||
|
||||
// If this claim is already prepared, there is no need to prepare it again.
|
||||
// If this claim is already prepared, continue preparing for any remaining claims.
|
||||
if claimInfo.isPrepared() {
|
||||
logger.V(5).Info("Resources already prepared", "pod", klog.KObj(pod), "podClaim", podClaim.Name, "claim", klog.KObj(resourceClaim))
|
||||
return nil
|
||||
continue
|
||||
}
|
||||
|
||||
// This saved claim will be used to update ClaimInfo cache
|
||||
|
||||
@@ -61,6 +61,10 @@ const (
|
||||
containerName = "test-container"
|
||||
)
|
||||
|
||||
var (
|
||||
testPodCounter atomic.Uint32
|
||||
)
|
||||
|
||||
type fakeDRADriverGRPCServer struct {
|
||||
drapb.UnimplementedDRAPluginServer
|
||||
drahealthv1alpha1.UnimplementedDRAResourceHealthServer
|
||||
@@ -276,6 +280,23 @@ func setupFakeDRADriverGRPCServer(ctx context.Context, shouldTimeout bool, plugi
|
||||
}, nil
|
||||
}
|
||||
|
||||
func genPrepareResourcesResponse(claimUID types.UID) *drapb.NodePrepareResourcesResponse {
|
||||
return &drapb.NodePrepareResourcesResponse{
|
||||
Claims: map[string]*drapb.NodePrepareResourceResponse{
|
||||
string(claimUID): {
|
||||
Devices: []*drapb.Device{
|
||||
{
|
||||
PoolName: poolName,
|
||||
DeviceName: deviceName,
|
||||
RequestNames: []string{requestName},
|
||||
CDIDeviceIDs: []string{cdiID},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewManagerImpl(t *testing.T) {
|
||||
kubeClient := fake.NewSimpleClientset()
|
||||
for _, test := range []struct {
|
||||
@@ -339,6 +360,45 @@ func genTestPod() *v1.Pod {
|
||||
}
|
||||
}
|
||||
|
||||
func genTestPodWithClaims(claimNames ...string) *v1.Pod {
|
||||
podCounter := testPodCounter.Add(1)
|
||||
|
||||
podName := fmt.Sprintf("test-pod-%d", podCounter)
|
||||
podUID := types.UID(fmt.Sprintf("test-pod-uid-%d", podCounter))
|
||||
|
||||
resourceClaims := make([]v1.PodResourceClaim, 0, len(claimNames))
|
||||
containerClaims := make([]v1.ResourceClaim, 0, len(claimNames))
|
||||
|
||||
for _, claimName := range claimNames {
|
||||
cn := claimName
|
||||
resourceClaims = append(resourceClaims, v1.PodResourceClaim{
|
||||
Name: cn,
|
||||
ResourceClaimName: &cn,
|
||||
})
|
||||
containerClaims = append(containerClaims, v1.ResourceClaim{
|
||||
Name: cn,
|
||||
})
|
||||
}
|
||||
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: namespace,
|
||||
UID: podUID,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
ResourceClaims: resourceClaims,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Resources: v1.ResourceRequirements{
|
||||
Claims: containerClaims,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// genTestPodWithExtendedResource generates pod object
|
||||
func genTestPodWithExtendedResource() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
@@ -692,18 +752,7 @@ func TestPrepareResources(t *testing.T) {
|
||||
claim: genTestClaim(claimName, driverName, deviceName, podUID),
|
||||
expectedPrepareCalls: 1,
|
||||
expectedClaimInfoState: genClaimInfoState(cdiID),
|
||||
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
|
||||
string(claimUID): {
|
||||
Devices: []*drapb.Device{
|
||||
{
|
||||
PoolName: poolName,
|
||||
DeviceName: deviceName,
|
||||
RequestNames: []string{requestName},
|
||||
CDIDeviceIDs: []string{cdiID},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
resp: genPrepareResourcesResponse(claimUID),
|
||||
},
|
||||
{
|
||||
description: "resource already prepared",
|
||||
@@ -712,18 +761,7 @@ func TestPrepareResources(t *testing.T) {
|
||||
claim: genTestClaim(claimName, driverName, deviceName, podUID),
|
||||
claimInfo: genTestClaimInfo(claimUID, []string{podUID}, true),
|
||||
expectedClaimInfoState: genClaimInfoState(cdiID),
|
||||
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
|
||||
string(claimUID): {
|
||||
Devices: []*drapb.Device{
|
||||
{
|
||||
PoolName: poolName,
|
||||
DeviceName: deviceName,
|
||||
RequestNames: []string{requestName},
|
||||
CDIDeviceIDs: []string{cdiID},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
resp: genPrepareResourcesResponse(claimUID),
|
||||
},
|
||||
{
|
||||
description: "should timeout",
|
||||
@@ -740,19 +778,8 @@ func TestPrepareResources(t *testing.T) {
|
||||
pod: genTestPod(),
|
||||
claim: genTestClaim(claimName, driverName, deviceName, podUID),
|
||||
expectedClaimInfoState: genClaimInfoState(cdiID),
|
||||
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
|
||||
string(claimUID): {
|
||||
Devices: []*drapb.Device{
|
||||
{
|
||||
PoolName: poolName,
|
||||
DeviceName: deviceName,
|
||||
RequestNames: []string{requestName},
|
||||
CDIDeviceIDs: []string{cdiID},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
expectedPrepareCalls: 1,
|
||||
resp: genPrepareResourcesResponse(claimUID),
|
||||
expectedPrepareCalls: 1,
|
||||
},
|
||||
{
|
||||
description: "should prepare extended resource claim backed by DRA",
|
||||
@@ -863,6 +890,74 @@ func TestPrepareResources(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestPrepareResourcesWithPreparedAndNewClaim verifies that PrepareResources
|
||||
// correctly handles a pod that references a mix of ResourceClaims:
|
||||
// - first claim already prepared by a previous pod
|
||||
// - second claim is new and needs to be prepared
|
||||
func TestPrepareResourcesWithPreparedAndNewClaim(t *testing.T) {
|
||||
logger, tCtx := ktesting.NewTestContext(t)
|
||||
fakeKubeClient := fake.NewClientset()
|
||||
|
||||
manager, err := NewManager(logger, fakeKubeClient, t.TempDir())
|
||||
require.NoError(t, err)
|
||||
manager.initDRAPluginManager(tCtx, getFakeNode, time.Second)
|
||||
|
||||
secondClaimName := fmt.Sprintf("%s-second", claimName)
|
||||
|
||||
// Generate two pods where the second pod reuses an existing claim and adds a new one
|
||||
firstPod := genTestPodWithClaims(claimName)
|
||||
secondPod := genTestPodWithClaims(claimName, secondClaimName)
|
||||
|
||||
firstClaim := genTestClaim(claimName, driverName, deviceName, string(firstPod.UID))
|
||||
secondClaim := genTestClaim(secondClaimName, driverName, deviceName, string(secondPod.UID))
|
||||
|
||||
// Make firstClaim reserved for first and second pod
|
||||
firstClaim.Status.ReservedFor = append(
|
||||
firstClaim.Status.ReservedFor,
|
||||
resourceapi.ResourceClaimConsumerReference{UID: secondPod.UID},
|
||||
)
|
||||
|
||||
_, err = fakeKubeClient.ResourceV1().ResourceClaims(namespace).Create(tCtx, firstClaim, metav1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = fakeKubeClient.ResourceV1().ResourceClaims(namespace).Create(tCtx, secondClaim, metav1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
respFirst := genPrepareResourcesResponse(firstClaim.UID)
|
||||
draServerInfo, err := setupFakeDRADriverGRPCServer(tCtx, false, nil, respFirst, nil, nil)
|
||||
require.NoError(t, err)
|
||||
defer draServerInfo.teardownFn()
|
||||
|
||||
plg := manager.GetWatcherHandler()
|
||||
require.NoError(t, plg.RegisterPlugin(driverName, draServerInfo.socketName, []string{drapb.DRAPluginService}, nil))
|
||||
|
||||
err = manager.PrepareResources(tCtx, firstPod)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, uint32(1),
|
||||
draServerInfo.server.prepareResourceCalls.Load(),
|
||||
"first pod should trigger one prepare call",
|
||||
)
|
||||
|
||||
respSecond := genPrepareResourcesResponse(secondClaim.UID)
|
||||
draServerInfo.server.prepareResourcesResponse = respSecond
|
||||
|
||||
err = manager.PrepareResources(tCtx, secondPod)
|
||||
require.NoError(t, err)
|
||||
|
||||
// second pod triggered exactly one prepare call (new claim only) + previous one call
|
||||
assert.Equal(t, uint32(2),
|
||||
draServerInfo.server.prepareResourceCalls.Load(),
|
||||
"second pod should trigger one prepare call for the new claim",
|
||||
)
|
||||
|
||||
for _, claimName := range []string{firstClaim.Name, secondClaim.Name} {
|
||||
claimInfo, exists := manager.cache.get(claimName, namespace)
|
||||
require.True(t, exists, "claim %s should exist in cache", claimName)
|
||||
assert.True(t, claimInfo.prepared, "claim %s should be marked as prepared", claimName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnprepareResources(t *testing.T) {
|
||||
fakeKubeClient := fake.NewSimpleClientset()
|
||||
for _, test := range []struct {
|
||||
|
||||
@@ -942,6 +942,50 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
|
||||
}
|
||||
}
|
||||
|
||||
singleNodeMultipleClaimsTests := func() {
|
||||
nodes := drautils.NewNodes(f, 1, 1)
|
||||
// Allow allocating more than one device so that multiple claims can be prepared on the same node.
|
||||
maxAllocations := 2
|
||||
driver := drautils.NewDriver(f, nodes, drautils.DriverResources(maxAllocations)) // All tests get their own driver instance.
|
||||
driver.WithKubelet = true
|
||||
b := drautils.NewBuilder(f, driver)
|
||||
|
||||
ginkgo.It("requests an already allocated and a new claim for a pod", func(ctx context.Context) {
|
||||
// This test covers a situation when a pod references a mix of already-prepared and new claims.
|
||||
firstClaim := b.ExternalClaim()
|
||||
secondClaim := b.ExternalClaim()
|
||||
|
||||
b.Create(ctx, firstClaim, secondClaim)
|
||||
|
||||
// First pod uses only firstClaim
|
||||
firstPod := b.PodExternal()
|
||||
b.Create(ctx, firstPod)
|
||||
b.TestPod(ctx, f, firstPod)
|
||||
|
||||
// Second pod uses firstClaim (already prepared) + secondClaim (new)
|
||||
secondPod := b.PodExternal()
|
||||
|
||||
secondPod.Spec.ResourceClaims = []v1.PodResourceClaim{
|
||||
{
|
||||
Name: "first",
|
||||
ResourceClaimName: &firstClaim.Name,
|
||||
},
|
||||
{
|
||||
Name: "second",
|
||||
ResourceClaimName: &secondClaim.Name,
|
||||
},
|
||||
}
|
||||
|
||||
secondPod.Spec.Containers[0].Resources.Claims = []v1.ResourceClaim{
|
||||
{Name: "first"},
|
||||
{Name: "second"},
|
||||
}
|
||||
|
||||
b.Create(ctx, secondPod)
|
||||
b.TestPod(ctx, f, secondPod)
|
||||
})
|
||||
}
|
||||
|
||||
// The following tests only make sense when there is more than one node.
|
||||
// They get skipped when there's only one node.
|
||||
multiNodeTests := func(withKubelet bool) {
|
||||
@@ -1806,6 +1850,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
|
||||
|
||||
framework.Context("control plane", framework.WithLabel("ConformanceCandidate") /* TODO: replace with framework.WithConformance() */, func() { singleNodeTests(false) })
|
||||
framework.Context("kubelet", feature.DynamicResourceAllocation, "on single node", func() { singleNodeTests(true) })
|
||||
framework.Context("kubelet", feature.DynamicResourceAllocation, "on single node with multiple claims allocation", singleNodeMultipleClaimsTests)
|
||||
|
||||
framework.Context("control plane", framework.WithLabel("ConformanceCandidate") /* TODO: replace with framework.WithConformance() */, func() { multiNodeTests(false) })
|
||||
framework.Context("kubelet", feature.DynamicResourceAllocation, "on multiple nodes", func() { multiNodeTests(true) })
|
||||
|
||||
Reference in New Issue
Block a user