mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 09:52:49 +00:00
Merge pull request #115107 from swatisehgal/handle-device-mgr-recovery-sample-dp-changes
node: device-mgr: sample device plugin: Add support to control registration process
This commit is contained in:
commit
78e5db0931
@ -22,7 +22,7 @@ import (
|
||||
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
|
||||
)
|
||||
|
||||
//go:embed cluster-dns flexvolume guestbook kubectl sample-device-plugin.yaml scheduling/nvidia-driver-installer.yaml statefulset storage-csi
|
||||
//go:embed cluster-dns flexvolume guestbook kubectl sample-device-plugin scheduling/nvidia-driver-installer.yaml statefulset storage-csi
|
||||
var e2eTestingManifestsFS embed.FS
|
||||
|
||||
func GetE2ETestingManifestsFS() e2etestfiles.EmbeddedFileSource {
|
||||
|
@ -0,0 +1,52 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: sample-device-plugin-beta
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: sample-device-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: sample-device-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: sample-device-plugin
|
||||
annotations:
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
tolerations:
|
||||
- operator: "Exists"
|
||||
effect: "NoExecute"
|
||||
- operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: plugins-registry-probe-mode
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/plugins_registry
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
containers:
|
||||
- image: registry.k8s.io/e2e-test-images/sample-device-plugin:1.5
|
||||
name: sample-device-plugin
|
||||
env:
|
||||
- name: PLUGIN_SOCK_DIR
|
||||
value: "/var/lib/kubelet/device-plugins"
|
||||
- name: REGISTER_CONTROL_FILE
|
||||
value: "/var/lib/kubelet/device-plugins/sample/registration"
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
- name: plugins-registry-probe-mode
|
||||
mountPath: /var/lib/kubelet/plugins_registry
|
||||
- name: dev
|
||||
mountPath: /dev
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
@ -22,7 +22,8 @@ import (
|
||||
|
||||
const (
|
||||
// SampleDevicePluginDSYAML is the path of the daemonset template of the sample device plugin. // TODO: Parametrize it by making it a feature in TestFramework.
|
||||
SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml"
|
||||
SampleDevicePluginDSYAML = "test/e2e/testing-manifests/sample-device-plugin.yaml"
|
||||
SampleDevicePluginControlRegistrationDSYAML = "test/e2e/testing-manifests/sample-device-plugin/sample-device-plugin-control-registration.yaml"
|
||||
|
||||
// SampleDevicePluginName is the name of the device plugin pod
|
||||
SampleDevicePluginName = "sample-device-plugin"
|
||||
|
@ -1 +1 @@
|
||||
1.4
|
||||
1.5
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"k8s.io/klog/v2"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1"
|
||||
@ -84,6 +85,7 @@ func main() {
|
||||
klog.Errorf("Empty pluginSocksDir")
|
||||
return
|
||||
}
|
||||
|
||||
socketPath := pluginSocksDir + "/dp." + fmt.Sprintf("%d", time.Now().Unix())
|
||||
|
||||
dp1 := plugin.NewDevicePluginStub(devs, socketPath, resourceName, false, false)
|
||||
@ -92,8 +94,70 @@ func main() {
|
||||
|
||||
}
|
||||
dp1.SetAllocFunc(stubAllocFunc)
|
||||
|
||||
if registerControlFile := os.Getenv("REGISTER_CONTROL_FILE"); registerControlFile != "" {
|
||||
if err := handleRegistrationProcess(registerControlFile); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := dp1.Register(pluginapi.KubeletSocket, resourceName, pluginapi.DevicePluginPath); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
select {}
|
||||
}
|
||||
|
||||
func handleRegistrationProcess(registerControlFile string) error {
|
||||
triggerPath := filepath.Dir(registerControlFile)
|
||||
|
||||
klog.InfoS("Registration process will be managed explicitly", "triggerPath", triggerPath, "triggerEntry", registerControlFile)
|
||||
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
klog.Errorf("Watcher creation failed: %v ", err)
|
||||
return err
|
||||
}
|
||||
|
||||
defer watcher.Close()
|
||||
updateCh := make(chan bool)
|
||||
defer close(updateCh)
|
||||
|
||||
go func() {
|
||||
klog.Infof("Starting watching routine")
|
||||
for {
|
||||
select {
|
||||
case event, ok := <-watcher.Events:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
klog.InfoS("Received event", "name", event.Name, "operation", event.Op)
|
||||
switch {
|
||||
case event.Op&fsnotify.Remove == fsnotify.Remove:
|
||||
if event.Name == registerControlFile {
|
||||
klog.InfoS("Expected delete", "name", event.Name, "operation", event.Op)
|
||||
updateCh <- true
|
||||
return
|
||||
}
|
||||
klog.InfoS("Spurious delete", "name", event.Name, "operation", event.Op)
|
||||
}
|
||||
case err, ok := <-watcher.Errors:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
klog.Errorf("error: %w", err)
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
err = watcher.Add(triggerPath)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to add watch to %q: %w", triggerPath, err)
|
||||
return err
|
||||
}
|
||||
|
||||
klog.InfoS("Waiting for control file to be deleted", "path", registerControlFile)
|
||||
<-updateCh
|
||||
klog.InfoS("Control file was deleted, connecting!")
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user