mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-14 14:23:37 +00:00
node: sample-dp: Add retry to handle device plugin restart failure
Add retry mechanism to handle cases where after kubelet restarts, the device plugin unix socket(s) were created but not ready to serve yet. Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
parent
d0d133298d
commit
9a354fc9d0
@ -217,11 +217,22 @@ func (m *Stub) Watch(kubeletEndpoint, resourceName, pluginSockDir string) {
|
||||
case event := <-m.kubeletRestartWatcher.Events:
|
||||
if event.Name == kubeletEndpoint && event.Op&fsnotify.Create == fsnotify.Create {
|
||||
klog.InfoS("inotify: file created, restarting", "kubeletEndpoint", kubeletEndpoint)
|
||||
if err := m.Restart(); err != nil {
|
||||
klog.ErrorS(err, "Unable to restart server")
|
||||
panic(err)
|
||||
var lastErr error
|
||||
|
||||
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, false, func(context.Context) (done bool, err error) {
|
||||
restartErr := m.Restart()
|
||||
if restartErr == nil {
|
||||
return true, nil
|
||||
}
|
||||
klog.ErrorS(restartErr, "Retrying after error")
|
||||
lastErr = restartErr
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to restart server: wait timed out", "lastErr", lastErr.Error())
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if ok := m.registerControlFunc(); ok {
|
||||
if err := m.Register(kubeletEndpoint, resourceName, pluginSockDir); err != nil {
|
||||
klog.ErrorS(err, "Unable to register to kubelet")
|
||||
|
Loading…
Reference in New Issue
Block a user