DRA resource claim controller: add statistics

This is primarily for testing. Proper metrics might be useful, but can still be
added later.
This commit is contained in:
Patrick Ohly 2024-10-27 13:36:12 +01:00
parent d94752ebc8
commit e88d5c37e6
2 changed files with 83 additions and 3 deletions

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/google/go-cmp/cmp"
@ -66,6 +67,11 @@ type Controller struct {
queue workqueue.TypedRateLimitingInterface[string]
sliceStore cache.Indexer
// Must use atomic access...
numCreates int64
numUpdates int64
numDeletes int64
mutex sync.RWMutex
// When receiving updates from the driver, the entire pointer replaced,
@ -209,6 +215,25 @@ func (c *Controller) Update(resources *DriverResources) {
}
}
// GetStats provides some insights into operations of the controller.
func (c *Controller) GetStats() Stats {
s := Stats{
NumCreates: atomic.LoadInt64(&c.numCreates),
NumUpdates: atomic.LoadInt64(&c.numUpdates),
NumDeletes: atomic.LoadInt64(&c.numDeletes),
}
return s
}
type Stats struct {
// NumCreates counts the number of ResourceSlices that got created.
NumCreates int64
// NumUpdates counts the number of ResourceSlices that got update.
NumUpdates int64
// NumDeletes counts the number of ResourceSlices that got deleted.
NumDeletes int64
}
// newController creates a new controller.
func newController(ctx context.Context, options Options) (*Controller, error) {
if options.KubeClient == nil {
@ -540,6 +565,7 @@ func (c *Controller) syncPool(ctx context.Context, poolName string) error {
if _, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("update resource slice: %w", err)
}
atomic.AddInt64(&c.numUpdates, 1)
}
// Create new slices.
@ -596,6 +622,7 @@ func (c *Controller) syncPool(ctx context.Context, poolName string) error {
if _, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{}); err != nil {
return fmt.Errorf("create resource slice: %w", err)
}
atomic.AddInt64(&c.numCreates, 1)
}
} else if len(slices) > 0 {
// All are obsolete, pool does not exist anymore.
@ -620,7 +647,13 @@ func (c *Controller) syncPool(ctx context.Context, poolName string) error {
// changes on the server. The only downside is the extra API
// call. This isn't as bad as extra creates.
logger.V(5).Info("Deleting obsolete resource slice", "slice", klog.KObj(slice), "deleteOptions", options)
if err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Delete(ctx, slice.Name, options); err != nil && !apierrors.IsNotFound(err) {
err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Delete(ctx, slice.Name, options)
switch {
case err == nil:
atomic.AddInt64(&c.numDeletes, 1)
case apierrors.IsNotFound(err):
// okay
default:
return fmt.Errorf("delete resource slice: %w", err)
}
}

View File

@ -96,6 +96,7 @@ func TestControllerSyncPool(t *testing.T) {
initialOtherObjects []runtime.Object
inputDriverResources *DriverResources
expectedResourceSlices []resourceapi.ResourceSlice
expectedStats Stats
}{
"create-slice": {
nodeUID: nodeUID,
@ -107,6 +108,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumCreates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(generatedName1).GenerateName(generateName).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -145,7 +149,10 @@ func TestControllerSyncPool(t *testing.T) {
Driver(driverName).Devices([]resourceapi.Device{}).
Pool(resourceapi.ResourcePool{Name: poolName, Generation: 1, ResourceSliceCount: 1}).Obj(),
},
inputDriverResources: &DriverResources{},
inputDriverResources: &DriverResources{},
expectedStats: Stats{
NumDeletes: 1,
},
expectedResourceSlices: nil,
},
"delete-and-add-slice": {
@ -163,6 +170,10 @@ func TestControllerSyncPool(t *testing.T) {
Slices: []Slice{{Devices: []resourceapi.Device{{Name: deviceName}}}}},
},
},
expectedStats: Stats{
NumDeletes: 1,
NumCreates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(generatedName1).GenerateName(generateName).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -188,6 +199,9 @@ func TestControllerSyncPool(t *testing.T) {
Slices: []Slice{{Devices: []resourceapi.Device{{Name: deviceName}}}}},
},
},
expectedStats: Stats{
NumDeletes: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -215,6 +229,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumUpdates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).ResourceVersion("1").
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -248,6 +265,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumUpdates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).ResourceVersion("1").
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -334,6 +354,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumDeletes: 2,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice2).UID(resourceSlice2).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -369,6 +392,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
// Generation not bumped, only one update.
expectedStats: Stats{
NumUpdates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -412,6 +438,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
// Generation bumped, all updated.
expectedStats: Stats{
NumUpdates: 3,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).ResourceVersion("1").
@ -454,6 +483,10 @@ func TestControllerSyncPool(t *testing.T) {
},
},
// Generation bumped, two updated, one removed.
expectedStats: Stats{
NumUpdates: 2,
NumDeletes: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).ResourceVersion("1").
@ -493,7 +526,10 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
// Three updated, one generated.
expectedStats: Stats{
NumUpdates: 3,
NumCreates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).ResourceVersion("1").
NodeOwnerReferences(ownerName, string(nodeUID)).NodeName(ownerName).
@ -523,6 +559,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumCreates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(driverName + "-0").GenerateName(driverName + "-").
AllNodes(true).
@ -540,6 +579,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumCreates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(generatedName1).GenerateName(generateName).
AppOwnerReferences(ownerName).NodeSelector(nodeSelector).
@ -562,6 +604,9 @@ func TestControllerSyncPool(t *testing.T) {
},
},
},
expectedStats: Stats{
NumUpdates: 1,
},
expectedResourceSlices: []resourceapi.ResourceSlice{
*MakeResourceSlice().Name(resourceSlice1).UID(resourceSlice1).ResourceVersion("1").
AppOwnerReferences(ownerName).NodeSelector(otherNodeSelector).
@ -626,6 +671,8 @@ func TestControllerSyncPool(t *testing.T) {
sortResourceSlices(resourceSlices.Items)
assert.Equal(t, test.expectedResourceSlices, resourceSlices.Items)
assert.Equal(t, test.expectedStats, ctrl.GetStats())
// The informer might have added a work item after ctrl.run returned.
state := queue.State()
state.Ready = nil