mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 01:40:13 +00:00
Update the sample device plugin to enable the e2e node tests (or any other entity with full access to the node filesystem) to control the registration process. We add a new environment variable `REGISTER_CONTROL_FILE`. The value of this variable must be a file which prevents the plugin to register itself while it's present. Once removed, the plugin will go on and complete the registration. The plugin will automatically detect the parent directory on which the file resides and detect deletions, unblocking the registration process. If the file is specified but unaccessible, the plugin will fail. If the file is not specified, the registration process will progress as usual and never pause. The plugin will need read access to the parent directory. This feature is useful because it is not possible to control the order in which the pods are recovered after node reboot/kubelet restart. In this approach, the testing environment will create a directory and then a empty file to pause the registration process of the plugin. Once pointed to that file, the plugin will start and wait for it to be deleted. Only after the directory has been deleted, the plugin would proceed to registration. This feature is used in #114640 where e2e test is implemented to simulate scenarios where application pods requesting devices come up before the device plugin pod on node reboot/ kubelet restart. Co-authored-by: Francesco Romani <fromani@redhat.com> Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
164 lines
4.4 KiB
Go
164 lines
4.4 KiB
Go
/*
|
|
Copyright 2018 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/fsnotify/fsnotify"
|
|
"k8s.io/klog/v2"
|
|
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
|
plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1"
|
|
)
|
|
|
|
const (
|
|
resourceName = "example.com/resource"
|
|
)
|
|
|
|
// stubAllocFunc creates and returns allocation response for the input allocate request
|
|
func stubAllocFunc(r *pluginapi.AllocateRequest, devs map[string]pluginapi.Device) (*pluginapi.AllocateResponse, error) {
|
|
var responses pluginapi.AllocateResponse
|
|
for _, req := range r.ContainerRequests {
|
|
response := &pluginapi.ContainerAllocateResponse{}
|
|
for _, requestID := range req.DevicesIDs {
|
|
dev, ok := devs[requestID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("invalid allocation request with non-existing device %s", requestID)
|
|
}
|
|
|
|
if dev.Health != pluginapi.Healthy {
|
|
return nil, fmt.Errorf("invalid allocation request with unhealthy device: %s", requestID)
|
|
}
|
|
|
|
// create fake device file
|
|
fpath := filepath.Join("/tmp", dev.ID)
|
|
|
|
// clean first
|
|
if err := os.RemoveAll(fpath); err != nil {
|
|
return nil, fmt.Errorf("failed to clean fake device file from previous run: %s", err)
|
|
}
|
|
|
|
f, err := os.Create(fpath)
|
|
if err != nil && !os.IsExist(err) {
|
|
return nil, fmt.Errorf("failed to create fake device file: %s", err)
|
|
}
|
|
|
|
f.Close()
|
|
|
|
response.Mounts = append(response.Mounts, &pluginapi.Mount{
|
|
ContainerPath: fpath,
|
|
HostPath: fpath,
|
|
})
|
|
}
|
|
responses.ContainerResponses = append(responses.ContainerResponses, response)
|
|
}
|
|
|
|
return &responses, nil
|
|
}
|
|
|
|
func main() {
|
|
devs := []*pluginapi.Device{
|
|
{ID: "Dev-1", Health: pluginapi.Healthy},
|
|
{ID: "Dev-2", Health: pluginapi.Healthy},
|
|
}
|
|
|
|
pluginSocksDir := os.Getenv("PLUGIN_SOCK_DIR")
|
|
klog.Infof("pluginSocksDir: %s", pluginSocksDir)
|
|
if pluginSocksDir == "" {
|
|
klog.Errorf("Empty pluginSocksDir")
|
|
return
|
|
}
|
|
|
|
socketPath := pluginSocksDir + "/dp." + fmt.Sprintf("%d", time.Now().Unix())
|
|
|
|
dp1 := plugin.NewDevicePluginStub(devs, socketPath, resourceName, false, false)
|
|
if err := dp1.Start(); err != nil {
|
|
panic(err)
|
|
|
|
}
|
|
dp1.SetAllocFunc(stubAllocFunc)
|
|
|
|
if registerControlFile := os.Getenv("REGISTER_CONTROL_FILE"); registerControlFile != "" {
|
|
if err := handleRegistrationProcess(registerControlFile); err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
if err := dp1.Register(pluginapi.KubeletSocket, resourceName, pluginapi.DevicePluginPath); err != nil {
|
|
panic(err)
|
|
}
|
|
select {}
|
|
}
|
|
|
|
func handleRegistrationProcess(registerControlFile string) error {
|
|
triggerPath := filepath.Dir(registerControlFile)
|
|
|
|
klog.InfoS("Registration process will be managed explicitly", "triggerPath", triggerPath, "triggerEntry", registerControlFile)
|
|
|
|
watcher, err := fsnotify.NewWatcher()
|
|
if err != nil {
|
|
klog.Errorf("Watcher creation failed: %v ", err)
|
|
return err
|
|
}
|
|
|
|
defer watcher.Close()
|
|
updateCh := make(chan bool)
|
|
defer close(updateCh)
|
|
|
|
go func() {
|
|
klog.Infof("Starting watching routine")
|
|
for {
|
|
select {
|
|
case event, ok := <-watcher.Events:
|
|
if !ok {
|
|
return
|
|
}
|
|
klog.InfoS("Received event", "name", event.Name, "operation", event.Op)
|
|
switch {
|
|
case event.Op&fsnotify.Remove == fsnotify.Remove:
|
|
if event.Name == registerControlFile {
|
|
klog.InfoS("Expected delete", "name", event.Name, "operation", event.Op)
|
|
updateCh <- true
|
|
return
|
|
}
|
|
klog.InfoS("Spurious delete", "name", event.Name, "operation", event.Op)
|
|
}
|
|
case err, ok := <-watcher.Errors:
|
|
if !ok {
|
|
return
|
|
}
|
|
klog.Errorf("error: %w", err)
|
|
panic(err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
err = watcher.Add(triggerPath)
|
|
if err != nil {
|
|
klog.Errorf("Failed to add watch to %q: %w", triggerPath, err)
|
|
return err
|
|
}
|
|
|
|
klog.InfoS("Waiting for control file to be deleted", "path", registerControlFile)
|
|
<-updateCh
|
|
klog.InfoS("Control file was deleted, connecting!")
|
|
return nil
|
|
}
|