From 786c667e60e9ba337a83e378f816b89aa86f0e9f Mon Sep 17 00:00:00 2001 From: Francesco Giudici Date: Tue, 25 Jan 2022 16:32:05 +0100 Subject: [PATCH] kata-monitor: increase delay before syncing with the container manager When we detect a new kata sandbox from the sbs fs, we add that to the sandbox cache to retrieve metrics. We also schedule a sync with the container manager, which we consider the source of truth: if the kata pod is not yet ready the container manager will not report it and we will drop it from our cache. We will add it back only when we re-sync, i.e., when we get an event from the sbs fs (which means a kata pod has been terminated or a new one has been started). Since we use the sync with the container manager to remove pods from the cache, we can wait some more before syncing (and so reduce the chance to miss a kata pod just because it was not ready yet). Let's raise the waiting time before starting the sync timer. Fixes: #3550 Signed-off-by: Francesco Giudici --- src/runtime/pkg/kata-monitor/monitor.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/pkg/kata-monitor/monitor.go b/src/runtime/pkg/kata-monitor/monitor.go index 6d0003af59..74777f2184 100644 --- a/src/runtime/pkg/kata-monitor/monitor.go +++ b/src/runtime/pkg/kata-monitor/monitor.go @@ -24,7 +24,7 @@ const ( RuntimeContainerd = "containerd" RuntimeCRIO = "cri-o" fsMonitorRetryDelaySeconds = 60 - podCacheRefreshDelaySeconds = 5 + podCacheRefreshDelaySeconds = 60 ) // SetLogger sets the logger for katamonitor package. @@ -85,7 +85,7 @@ func (km *KataMonitor) startPodCacheUpdater() { break } // we refresh the pod cache once if we get multiple add/delete pod events in a short time (< podCacheRefreshDelaySeconds) - cacheUpdateTimer := time.NewTimer(podCacheRefreshDelaySeconds * time.Second) + cacheUpdateTimer := time.NewTimer(5 * time.Second) cacheUpdateTimerWasSet := false for { select {