mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-11 21:12:07 +00:00
kubelet: assign Node as an owner for the ResourceSlice
Co-authored-by: Patrick Ohly <patrick.ohly@intel.com>
This commit is contained in:
parent
fa07055b1f
commit
26881132bd
@ -306,6 +306,10 @@ func TestGetResources(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getFakeNode() (*v1.Node, error) {
|
||||||
|
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker"}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func TestPrepareResources(t *testing.T) {
|
func TestPrepareResources(t *testing.T) {
|
||||||
fakeKubeClient := fake.NewSimpleClientset()
|
fakeKubeClient := fake.NewSimpleClientset()
|
||||||
|
|
||||||
@ -760,7 +764,7 @@ func TestPrepareResources(t *testing.T) {
|
|||||||
}
|
}
|
||||||
defer draServerInfo.teardownFn()
|
defer draServerInfo.teardownFn()
|
||||||
|
|
||||||
plg := plugin.NewRegistrationHandler(nil, "worker")
|
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
|
||||||
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
|
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
|
||||||
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
|
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
|
||||||
}
|
}
|
||||||
@ -1060,7 +1064,7 @@ func TestUnprepareResources(t *testing.T) {
|
|||||||
}
|
}
|
||||||
defer draServerInfo.teardownFn()
|
defer draServerInfo.teardownFn()
|
||||||
|
|
||||||
plg := plugin.NewRegistrationHandler(nil, "worker")
|
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
|
||||||
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
|
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}); err != nil {
|
||||||
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
|
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
"google.golang.org/grpc/codes"
|
"google.golang.org/grpc/codes"
|
||||||
"google.golang.org/grpc/status"
|
"google.golang.org/grpc/status"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
resourceapi "k8s.io/api/resource/v1alpha2"
|
resourceapi "k8s.io/api/resource/v1alpha2"
|
||||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
@ -39,6 +40,7 @@ import (
|
|||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
|
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
|
||||||
|
"k8s.io/utils/ptr"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -52,7 +54,7 @@ const (
|
|||||||
type nodeResourcesController struct {
|
type nodeResourcesController struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
kubeClient kubernetes.Interface
|
kubeClient kubernetes.Interface
|
||||||
nodeName string
|
getNode func() (*v1.Node, error)
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
queue workqueue.RateLimitingInterface
|
queue workqueue.RateLimitingInterface
|
||||||
sliceStore cache.Store
|
sliceStore cache.Store
|
||||||
@ -84,7 +86,7 @@ type activePlugin struct {
|
|||||||
// the controller is inactive. This can happen when kubelet is run stand-alone
|
// the controller is inactive. This can happen when kubelet is run stand-alone
|
||||||
// without an apiserver. In that case we can't and don't need to publish
|
// without an apiserver. In that case we can't and don't need to publish
|
||||||
// ResourceSlices.
|
// ResourceSlices.
|
||||||
func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Interface, nodeName string) *nodeResourcesController {
|
func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *nodeResourcesController {
|
||||||
if kubeClient == nil {
|
if kubeClient == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -96,7 +98,7 @@ func startNodeResourcesController(ctx context.Context, kubeClient kubernetes.Int
|
|||||||
c := &nodeResourcesController{
|
c := &nodeResourcesController{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
kubeClient: kubeClient,
|
kubeClient: kubeClient,
|
||||||
nodeName: nodeName,
|
getNode: getNode,
|
||||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_resource_slices"),
|
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_resource_slices"),
|
||||||
activePlugins: make(map[string]*activePlugin),
|
activePlugins: make(map[string]*activePlugin),
|
||||||
}
|
}
|
||||||
@ -252,16 +254,29 @@ func (c *nodeResourcesController) run(ctx context.Context) {
|
|||||||
// For now syncing starts immediately, with no DeleteCollection. This
|
// For now syncing starts immediately, with no DeleteCollection. This
|
||||||
// can be reconsidered later.
|
// can be reconsidered later.
|
||||||
|
|
||||||
// While kubelet starts up, there are errors:
|
// Wait until we're able to get a Node object.
|
||||||
// E0226 13:41:19.880621 126334 reflector.go:150] k8s.io/client-go@v0.0.0/tools/cache/reflector.go:232: Failed to watch *v1alpha2.ResourceSlice: failed to list *v1alpha2.ResourceSlice: resourceslices.resource.k8s.io is forbidden: User "system:anonymous" cannot list resource "resourceslices" in API group "resource.k8s.io" at the cluster scope
|
// This means that the object is created on the API server,
|
||||||
//
|
// the kubeclient is functional and the node informer cache is populated with the node object.
|
||||||
// The credentials used by kubeClient seem to get swapped out later,
|
// Without this it doesn't make sense to proceed further as we need a node name and
|
||||||
// because eventually these list calls succeed.
|
// a node UID for this controller to work.
|
||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123691): can we avoid these error log entries? Perhaps wait here?
|
var node *v1.Node
|
||||||
|
var err error
|
||||||
|
for {
|
||||||
|
node, err = c.getNode()
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
logger.V(5).Info("Getting Node object failed, waiting", "err", err)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We could use an indexer on driver name, but that seems overkill.
|
// We could use an indexer on driver name, but that seems overkill.
|
||||||
informer := resourceinformers.NewFilteredResourceSliceInformer(c.kubeClient, resyncPeriod, nil, func(options *metav1.ListOptions) {
|
informer := resourceinformers.NewFilteredResourceSliceInformer(c.kubeClient, resyncPeriod, nil, func(options *metav1.ListOptions) {
|
||||||
options.FieldSelector = "nodeName=" + c.nodeName
|
options.FieldSelector = "nodeName=" + node.Name
|
||||||
})
|
})
|
||||||
c.sliceStore = informer.GetStore()
|
c.sliceStore = informer.GetStore()
|
||||||
handler, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
handler, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
@ -441,13 +456,29 @@ func (c *nodeResourcesController) sync(ctx context.Context, driverName string) e
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Although node name and UID are unlikely to change
|
||||||
|
// we're getting updated node object just to be on the safe side.
|
||||||
|
// It's a cheap operation as it gets an object from the node informer cache.
|
||||||
|
node, err := c.getNode()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("retrieve node object: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Create a new slice.
|
// Create a new slice.
|
||||||
slice := &resourceapi.ResourceSlice{
|
slice := &resourceapi.ResourceSlice{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
GenerateName: c.nodeName + "-" + driverName + "-",
|
GenerateName: node.Name + "-" + driverName + "-",
|
||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123692): node object as owner
|
OwnerReferences: []metav1.OwnerReference{
|
||||||
|
{
|
||||||
|
APIVersion: v1.SchemeGroupVersion.WithKind("Node").Version,
|
||||||
|
Kind: v1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||||
|
Name: node.Name,
|
||||||
|
UID: node.UID,
|
||||||
|
Controller: ptr.To(true),
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
NodeName: c.nodeName,
|
NodeName: node.Name,
|
||||||
DriverName: driverName,
|
DriverName: driverName,
|
||||||
ResourceModel: *resource,
|
ResourceModel: *resource,
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
"google.golang.org/grpc/connectivity"
|
"google.golang.org/grpc/connectivity"
|
||||||
"google.golang.org/grpc/credentials/insecure"
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
utilversion "k8s.io/apimachinery/pkg/util/version"
|
utilversion "k8s.io/apimachinery/pkg/util/version"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
@ -104,12 +105,12 @@ type RegistrationHandler struct {
|
|||||||
// Must only be called once per process because it manages global state.
|
// Must only be called once per process because it manages global state.
|
||||||
// If a kubeClient is provided, then it synchronizes ResourceSlices
|
// If a kubeClient is provided, then it synchronizes ResourceSlices
|
||||||
// with the resource information provided by plugins.
|
// with the resource information provided by plugins.
|
||||||
func NewRegistrationHandler(kubeClient kubernetes.Interface, nodeName string) *RegistrationHandler {
|
func NewRegistrationHandler(kubeClient kubernetes.Interface, getNode func() (*v1.Node, error)) *RegistrationHandler {
|
||||||
handler := &RegistrationHandler{}
|
handler := &RegistrationHandler{}
|
||||||
|
|
||||||
// If kubelet ever gets an API for stopping registration handlers, then
|
// If kubelet ever gets an API for stopping registration handlers, then
|
||||||
// that would need to be hooked up with stopping the controller.
|
// that would need to be hooked up with stopping the controller.
|
||||||
handler.controller = startNodeResourcesController(context.TODO(), kubeClient, nodeName)
|
handler.controller = startNodeResourcesController(context.TODO(), kubeClient, getNode)
|
||||||
|
|
||||||
return handler
|
return handler
|
||||||
}
|
}
|
||||||
|
@ -20,11 +20,17 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func getFakeNode() (*v1.Node, error) {
|
||||||
|
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker"}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func TestRegistrationHandler_ValidatePlugin(t *testing.T) {
|
func TestRegistrationHandler_ValidatePlugin(t *testing.T) {
|
||||||
newRegistrationHandler := func() *RegistrationHandler {
|
newRegistrationHandler := func() *RegistrationHandler {
|
||||||
return NewRegistrationHandler(nil, "worker")
|
return NewRegistrationHandler(nil, getFakeNode)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range []struct {
|
for _, test := range []struct {
|
||||||
|
@ -1557,7 +1557,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
|
|||||||
kl.pluginManager.AddHandler(pluginwatcherapi.CSIPlugin, plugincache.PluginHandler(csi.PluginHandler))
|
kl.pluginManager.AddHandler(pluginwatcherapi.CSIPlugin, plugincache.PluginHandler(csi.PluginHandler))
|
||||||
// Adding Registration Callback function for DRA Plugin
|
// Adding Registration Callback function for DRA Plugin
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||||
kl.pluginManager.AddHandler(pluginwatcherapi.DRAPlugin, plugincache.PluginHandler(draplugin.NewRegistrationHandler(kl.kubeClient, kl.hostname)))
|
kl.pluginManager.AddHandler(pluginwatcherapi.DRAPlugin, plugincache.PluginHandler(draplugin.NewRegistrationHandler(kl.kubeClient, kl.getNodeAnyWay)))
|
||||||
}
|
}
|
||||||
// Adding Registration Callback function for Device Manager
|
// Adding Registration Callback function for Device Manager
|
||||||
kl.pluginManager.AddHandler(pluginwatcherapi.DevicePlugin, kl.containerManager.GetPluginRegistrationHandler())
|
kl.pluginManager.AddHandler(pluginwatcherapi.DevicePlugin, kl.containerManager.GetPluginRegistrationHandler())
|
||||||
|
@ -905,10 +905,23 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
|
|||||||
resourceClient := f.ClientSet.ResourceV1alpha2().ResourceSlices()
|
resourceClient := f.ClientSet.ResourceV1alpha2().ResourceSlices()
|
||||||
var expectedObjects []any
|
var expectedObjects []any
|
||||||
for _, nodeName := range nodes.NodeNames {
|
for _, nodeName := range nodes.NodeNames {
|
||||||
|
node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err, "get node")
|
||||||
expectedObjects = append(expectedObjects,
|
expectedObjects = append(expectedObjects,
|
||||||
gstruct.MatchAllFields(gstruct.Fields{
|
gstruct.MatchAllFields(gstruct.Fields{
|
||||||
"TypeMeta": gstruct.Ignore(),
|
"TypeMeta": gstruct.Ignore(),
|
||||||
"ObjectMeta": gstruct.Ignore(), // TODO (https://github.com/kubernetes/kubernetes/issues/123692): validate ownerref
|
"ObjectMeta": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
|
||||||
|
"OwnerReferences": gomega.ContainElements(
|
||||||
|
gstruct.MatchAllFields(gstruct.Fields{
|
||||||
|
"APIVersion": gomega.Equal("v1"),
|
||||||
|
"Kind": gomega.Equal("Node"),
|
||||||
|
"Name": gomega.Equal(nodeName),
|
||||||
|
"UID": gomega.Equal(node.UID),
|
||||||
|
"Controller": gomega.Equal(ptr.To(true)),
|
||||||
|
"BlockOwnerDeletion": gomega.BeNil(),
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
"NodeName": gomega.Equal(nodeName),
|
"NodeName": gomega.Equal(nodeName),
|
||||||
"DriverName": gomega.Equal(driver.Name),
|
"DriverName": gomega.Equal(driver.Name),
|
||||||
"ResourceModel": gomega.Equal(resourcev1alpha2.ResourceModel{NamedResources: &resourcev1alpha2.NamedResourcesResources{
|
"ResourceModel": gomega.Equal(resourcev1alpha2.ResourceModel{NamedResources: &resourcev1alpha2.NamedResourcesResources{
|
||||||
|
Loading…
Reference in New Issue
Block a user