kubelet: assign Node as an owner for the ResourceSlice

Co-authored-by: Patrick Ohly <patrick.ohly@intel.com>
This commit is contained in:
Ed Bartosh 2024-03-13 01:04:57 +02:00
parent fa07055b1f
commit 26881132bd
6 changed files with 76 additions and 21 deletions

View File

@ -306,6 +306,10 @@ func TestGetResources(t *testing.T) {
}
}
func getFakeNode() (*v1.Node, error) {
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker"}}, nil
}
func TestPrepareResources(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset()
@ -760,7 +764,7 @@ func TestPrepareResources(t *testing.T) {
}
defer draServerInfo.teardownFn()
plg := plugin.NewRegistrationHandler(nil, "worker")
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
}
@ -1060,7 +1064,7 @@ func TestUnprepareResources(t *testing.T) {
}
defer draServerInfo.teardownFn()
plg := plugin.NewRegistrationHandler(nil, "worker")
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
}

View File

@ -28,6 +28,7 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha2"
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -39,6 +40,7 @@ import (
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
"k8s.io/utils/ptr"
)
const (
@ -52,7 +54,7 @@ const (
type nodeResourcesController struct {
ctx context.Context
kubeClient kubernetes.Interface
nodeName string
getNode func() (*v1.Node, error)
wg sync.WaitGroup
queue workqueue.RateLimitingInterface
sliceStore cache.Store
@ -84,7 +86,7 @@ type activePlugin struct {
// the controller is inactive. This can happen when kubelet is run stand-alone
// without an apiserver. In that case we can't and don't need to publish
// ResourceSlices.
func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Interface, nodeName string) *nodeResourcesController {
func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *nodeResourcesController {
if kubeClient == nil {
return nil
}
@ -96,7 +98,7 @@ func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Int
c := &nodeResourcesController{
ctx: ctx,
kubeClient: kubeClient,
nodeName: nodeName,
getNode: getNode,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_resource_slices"),
activePlugins: make(map[string]*activePlugin),
}
@ -252,16 +254,29 @@ func (c *nodeResourcesController) run(ctx context.Context) {
// For now syncing starts immediately, with no DeleteCollection. This
// can be reconsidered later.
// While kubelet starts up, there are errors:
// E0226 13:41:19.880621 126334 reflector.go:150] k8s.io/client-go@v0.0.0/tools/cache/reflector.go:232: Failed to watch *v1alpha2.ResourceSlice: failed to list *v1alpha2.ResourceSlice: resourceslices.resource.k8s.io is forbidden: User "system:anonymous" cannot list resource "resourceslices" in API group "resource.k8s.io" at the cluster scope
//
// The credentials used by kubeClient seem to get swapped out later,
// because eventually these list calls succeed.
// TODO (https://github.com/kubernetes/kubernetes/issues/123691): can we avoid these error log entries? Perhaps wait here?
// Wait until we're able to get a Node object.
// This means that the object is created on the API server,
// the kubeclient is functional and the node informer cache is populated with the node object.
// Without this it doesn't make sense to proceed further as we need a node name and
// a node UID for this controller to work.
var node *v1.Node
var err error
for {
node, err = c.getNode()
if err == nil {
break
}
logger.V(5).Info("Getting Node object failed, waiting", "err", err)
select {
case <-ctx.Done():
return
case <-time.After(time.Second):
}
}
// We could use an indexer on driver name, but that seems overkill.
informer := resourceinformers.NewFilteredResourceSliceInformer(c.kubeClient, resyncPeriod, nil, func(options *metav1.ListOptions) {
options.FieldSelector = "nodeName=" + c.nodeName
options.FieldSelector = "nodeName=" + node.Name
})
c.sliceStore = informer.GetStore()
handler, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -441,13 +456,29 @@ func (c *nodeResourcesController) sync(ctx context.Context, driverName string) e
continue
}
// Although node name and UID are unlikely to change
// we're getting updated node object just to be on the safe side.
// It's a cheap operation as it gets an object from the node informer cache.
node, err := c.getNode()
if err != nil {
return fmt.Errorf("retrieve node object: %w", err)
}
// Create a new slice.
slice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
GenerateName: c.nodeName + "-" + driverName + "-",
// TODO (https://github.com/kubernetes/kubernetes/issues/123692): node object as owner
GenerateName: node.Name + "-" + driverName + "-",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: v1.SchemeGroupVersion.WithKind("Node").Version,
Kind: v1.SchemeGroupVersion.WithKind("Node").Kind,
Name: node.Name,
UID: node.UID,
Controller: ptr.To(true),
},
},
},
NodeName: c.nodeName,
NodeName: node.Name,
DriverName: driverName,
ResourceModel: *resource,
}

View File

@ -28,6 +28,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials/insecure"
v1 "k8s.io/api/core/v1"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
@ -104,12 +105,12 @@ type RegistrationHandler struct {
// Must only be called once per process because it manages global state.
// If a kubeClient is provided, then it synchronizes ResourceSlices
// with the resource information provided by plugins.
func NewRegistrationHandler(kubeClient kubernetes.Interface, nodeName string) *RegistrationHandler {
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
handler := &RegistrationHandler{}
// If kubelet ever gets an API for stopping registration handlers, then
// that would need to be hooked up with stopping the controller.
handler.controller = startNodeResourcesController(context.TODO(), kubeClient, nodeName)
handler.controller = startNodeResourcesController(context.TODO(), kubeClient, getNode)
return handler
}

View File

@ -20,11 +20,17 @@ import (
"testing"
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func getFakeNode() (*v1.Node, error) {
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker"}}, nil
}
func TestRegistrationHandler_ValidatePlugin(t *testing.T) {
newRegistrationHandler := func() *RegistrationHandler {
return NewRegistrationHandler(nil, "worker")
return NewRegistrationHandler(nil, getFakeNode)
}
for _, test := range []struct {

View File

@ -1557,7 +1557,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
kl.pluginManager.AddHandler(pluginwatcherapi.CSIPlugin, plugincache.PluginHandler(csi.PluginHandler))
// Adding Registration Callback function for DRA Plugin
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
kl.pluginManager.AddHandler(pluginwatcherapi.DRAPlugin, plugincache.PluginHandler(draplugin.NewRegistrationHandler(kl.kubeClient, kl.hostname)))
kl.pluginManager.AddHandler(pluginwatcherapi.DRAPlugin, plugincache.PluginHandler(draplugin.NewRegistrationHandler(kl.kubeClient, kl.getNodeAnyWay)))
}
// Adding Registration Callback function for Device Manager
kl.pluginManager.AddHandler(pluginwatcherapi.DevicePlugin, kl.containerManager.GetPluginRegistrationHandler())

View File

@ -905,10 +905,23 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
resourceClient := f.ClientSet.ResourceV1alpha2().ResourceSlices()
var expectedObjects []any
for _, nodeName := range nodes.NodeNames {
node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
framework.ExpectNoError(err, "get node")
expectedObjects = append(expectedObjects,
gstruct.MatchAllFields(gstruct.Fields{
"TypeMeta": gstruct.Ignore(),
"ObjectMeta": gstruct.Ignore(), // TODO (https://github.com/kubernetes/kubernetes/issues/123692): validate ownerref
"TypeMeta": gstruct.Ignore(),
"ObjectMeta": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
"OwnerReferences": gomega.ContainElements(
gstruct.MatchAllFields(gstruct.Fields{
"APIVersion": gomega.Equal("v1"),
"Kind": gomega.Equal("Node"),
"Name": gomega.Equal(nodeName),
"UID": gomega.Equal(node.UID),
"Controller": gomega.Equal(ptr.To(true)),
"BlockOwnerDeletion": gomega.BeNil(),
}),
),
}),
"NodeName": gomega.Equal(nodeName),
"DriverName": gomega.Equal(driver.Name),
"ResourceModel": gomega.Equal(resourcev1alpha2.ResourceModel{NamedResources: &resourcev1alpha2.NamedResourcesResources{