Merge pull request #115107 from swatisehgal/handle-device-mgr-recovery-sample-dp-changes

node: device-mgr: sample device plugin: Add support to control registration process
This commit is contained in:
Kubernetes Prow Robot 2023-03-02 05:42:55 -08:00 committed by GitHub
commit 78e5db0931
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 120 additions and 3 deletions

View File

@ -22,7 +22,7 @@ import (
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
)
//go:embed cluster-dns flexvolume guestbook kubectl sample-device-plugin.yaml scheduling/nvidia-driver-installer.yaml statefulset storage-csi
//go:embed cluster-dns flexvolume guestbook kubectl sample-device-plugin scheduling/nvidia-driver-installer.yaml statefulset storage-csi
var e2eTestingManifestsFS embed.FS
func GetE2ETestingManifestsFS() e2etestfiles.EmbeddedFileSource {

View File

@ -0,0 +1,52 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: sample-device-plugin-beta
namespace: kube-system
labels:
k8s-app: sample-device-plugin
spec:
selector:
matchLabels:
k8s-app: sample-device-plugin
template:
metadata:
labels:
k8s-app: sample-device-plugin
annotations:
spec:
priorityClassName: system-node-critical
tolerations:
- operator: "Exists"
effect: "NoExecute"
- operator: "Exists"
effect: "NoSchedule"
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: plugins-registry-probe-mode
hostPath:
path: /var/lib/kubelet/plugins_registry
- name: dev
hostPath:
path: /dev
containers:
- image: registry.k8s.io/e2e-test-images/sample-device-plugin:1.5
name: sample-device-plugin
env:
- name: PLUGIN_SOCK_DIR
value: "/var/lib/kubelet/device-plugins"
- name: REGISTER_CONTROL_FILE
value: "/var/lib/kubelet/device-plugins/sample/registration"
securityContext:
privileged: true
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: plugins-registry-probe-mode
mountPath: /var/lib/kubelet/plugins_registry
- name: dev
mountPath: /dev
updateStrategy:
type: RollingUpdate

View File

@ -22,7 +22,8 @@ import (
const (
// SampleDevicePluginDSYAML is the path of the daemonset template of the sample device plugin. // TODO: Parametrize it by making it a feature in TestFramework.
SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml"
SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml"
SampleDevicePluginControlRegistrationDSYAML = "test/e2e/testing-manifests/sample-device-plugin/sample-device-plugin-control-registration.yaml"
// SampleDevicePluginName is the name of the device plugin pod
SampleDevicePluginName = "sample-device-plugin"

View File

@ -1 +1 @@
1.4
1.5

View File

@ -22,6 +22,7 @@ import (
"path/filepath"
"time"
"github.com/fsnotify/fsnotify"
"k8s.io/klog/v2"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1"
@ -84,6 +85,7 @@ func main() {
klog.Errorf("Empty pluginSocksDir")
return
}
socketPath := pluginSocksDir + "/dp." + fmt.Sprintf("%d", time.Now().Unix())
dp1 := plugin.NewDevicePluginStub(devs, socketPath, resourceName, false, false)
@ -92,8 +94,70 @@ func main() {
}
dp1.SetAllocFunc(stubAllocFunc)
if registerControlFile := os.Getenv("REGISTER_CONTROL_FILE"); registerControlFile != "" {
if err := handleRegistrationProcess(registerControlFile); err != nil {
panic(err)
}
}
if err := dp1.Register(pluginapi.KubeletSocket, resourceName, pluginapi.DevicePluginPath); err != nil {
panic(err)
}
select {}
}
func handleRegistrationProcess(registerControlFile string) error {
triggerPath := filepath.Dir(registerControlFile)
klog.InfoS("Registration process will be managed explicitly", "triggerPath", triggerPath, "triggerEntry", registerControlFile)
watcher, err := fsnotify.NewWatcher()
if err != nil {
klog.Errorf("Watcher creation failed: %v ", err)
return err
}
defer watcher.Close()
updateCh := make(chan bool)
defer close(updateCh)
go func() {
klog.Infof("Starting watching routine")
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
klog.InfoS("Received event", "name", event.Name, "operation", event.Op)
switch {
case event.Op&fsnotify.Remove == fsnotify.Remove:
if event.Name == registerControlFile {
klog.InfoS("Expected delete", "name", event.Name, "operation", event.Op)
updateCh <- true
return
}
klog.InfoS("Spurious delete", "name", event.Name, "operation", event.Op)
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
klog.Errorf("error: %w", err)
panic(err)
}
}
}()
err = watcher.Add(triggerPath)
if err != nil {
klog.Errorf("Failed to add watch to %q: %w", triggerPath, err)
return err
}
klog.InfoS("Waiting for control file to be deleted", "path", registerControlFile)
<-updateCh
klog.InfoS("Control file was deleted, connecting!")
return nil
}