runtime: use symptom-based rescan instead of runtime detection

Modern container runtimes (Docker 29+) no longer advertise their
identity through OCI hooks or annotations. Rather than attempting
fragile runtime detection, check for the symptom: no network
endpoints after sandbox creation.

- Remove IsDockerContainer guard from RescanNetwork goroutine
- Remove container kill on timeout (too aggressive without reliable
  runtime detection, breaks CNI on slow architectures)
- Restore original startVM endpoint scan condition (fixes CNI
  regression on s390x)
- RescanNetwork returns nil on timeout with warning instead of error

Signed-off-by: llink5 <llink5@users.noreply.github.com>
This commit is contained in:
llink5
2026-04-01 11:53:51 +00:00
committed by Fabiano Fidêncio
parent d13bd3f7eb
commit 5ae8a608df
2 changed files with 15 additions and 26 deletions

View File

@@ -8,13 +8,11 @@ package containerdshim
import (
"context"
"fmt"
"syscall"
"github.com/containerd/containerd/api/types/task"
"github.com/sirupsen/logrus"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vcutils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
)
func startContainer(ctx context.Context, s *service, c *container) (retErr error) {
@@ -48,25 +46,18 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
}
go watchSandbox(ctx, s)
// Docker 26+ configures networking after the Start response.
// Run the network rescan asynchronously so we don't block
// the Start RPC — Docker won't call allocateNetwork until
// it receives the StartResponse.
if c.spec != nil && vcutils.IsDockerContainer(c.spec) {
go func() {
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
shimLog.WithError(err).WithFields(logrus.Fields{
"sandbox": s.sandbox.ID(),
"container": c.id,
}).Error("Docker 26+ network setup failed: no interfaces discovered after timeout. " +
"Container killed to prevent silent networking failure. " +
"Check Docker daemon logs and network configuration.")
if sigErr := s.sandbox.SignalProcess(s.ctx, c.id, c.id, syscall.SIGKILL, true); sigErr != nil {
shimLog.WithError(sigErr).Error("failed to kill container after network setup failure")
}
}
}()
}
// If no network endpoints were discovered during sandbox creation,
// schedule an async rescan. This handles runtimes that configure
// networking after task creation (e.g. Docker 26+ configures
// networking after the Start response, and prestart hooks may
// not have run yet on slower architectures).
// RescanNetwork is idempotent — it returns immediately if
// endpoints already exist.
go func() {
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
shimLog.WithError(err).Error("async network rescan failed — container may lack networking")
}
}()
// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
// shim context and the context passed to startContainer for tracing.

View File

@@ -371,7 +371,8 @@ func (s *Sandbox) RescanNetwork(ctx context.Context) error {
case <-ctx.Done():
return ctx.Err()
case <-deadline.C:
return fmt.Errorf("no network interfaces discovered after %s timeout", maxWait)
s.Logger().Warn("no network interfaces found after timeout — networking may be configured by prestart hooks")
return nil
case <-ticker.C:
}
}
@@ -1571,12 +1572,9 @@ func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Con
// 3. In case of vm factory, scan the netns to hotplug interfaces after vm is started.
// 4. In case of prestartHookFunc, network config might have been changed. We need to
// rescan and handle the change.
// 5. If no endpoints were found pre-VM-start (e.g. Docker 26+ placed the
// hypervisor in its own pre-configured netns), rescan now that the
// hypervisor is running so addAllEndpoints can discover its namespace.
if !s.config.NetworkConfig.DisableNewNetwork &&
caps.IsNetworkDeviceHotplugSupported() &&
(s.factory != nil || prestartHookFunc != nil || len(s.network.Endpoints()) == 0) {
(s.factory != nil || prestartHookFunc != nil) {
if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil {
return err
}