mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-02 18:13:57 +00:00
runtime: kill container on network timeout, address review nitpicks
- Kill container via SIGKILL when RescanNetwork times out instead of silently continuing without networking - Remove unused networkReady channel - Fix import ordering, structured logging, log levels - Remove double-logging on timeout path - Add rollback comment and interface doc comment - Use logrus.Fields and plain const consistently Signed-off-by: llink5 <llink5@users.noreply.github.com>
This commit is contained in:
@@ -8,11 +8,13 @@ package containerdshim
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||
vcutils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
)
|
||||
|
||||
func startContainer(ctx context.Context, s *service, c *container) (retErr error) {
|
||||
@@ -50,11 +52,21 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
|
||||
// Run the network rescan asynchronously so we don't block
|
||||
// the Start RPC — Docker won't call allocateNetwork until
|
||||
// it receives the StartResponse.
|
||||
go func() {
|
||||
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
|
||||
shimLog.WithError(err).Warn("async network rescan failed")
|
||||
}
|
||||
}()
|
||||
if c.spec != nil && vcutils.IsDockerContainer(c.spec) {
|
||||
go func() {
|
||||
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
|
||||
shimLog.WithError(err).WithFields(logrus.Fields{
|
||||
"sandbox": s.sandbox.ID(),
|
||||
"container": c.id,
|
||||
}).Error("Docker 26+ network setup failed: no interfaces discovered after timeout. " +
|
||||
"Container killed to prevent silent networking failure. " +
|
||||
"Check Docker daemon logs and network configuration.")
|
||||
if sigErr := s.sandbox.SignalProcess(s.ctx, c.id, c.id, syscall.SIGKILL, true); sigErr != nil {
|
||||
shimLog.WithError(sigErr).Error("failed to kill container after network setup failure")
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
|
||||
// shim context and the context passed to startContainer for tracing.
|
||||
|
||||
@@ -72,6 +72,7 @@ type VCSandbox interface {
|
||||
|
||||
GetOOMEvent(ctx context.Context) (string, error)
|
||||
GetHypervisorPid() (int, error)
|
||||
// RescanNetwork re-scans the network namespace for late-discovered endpoints.
|
||||
RescanNetwork(ctx context.Context) error
|
||||
|
||||
UpdateRuntimeMetrics() error
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/containernetworking/plugins/pkg/ns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"github.com/vishvananda/netns"
|
||||
otelTrace "go.opentelemetry.io/otel/trace"
|
||||
@@ -347,10 +348,10 @@ func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug
|
||||
// hypervisor is running in a different namespace and retry there.
|
||||
if len(endpoints) == 0 && s != nil {
|
||||
if hypervisorNs, ok := n.detectHypervisorNetns(s); ok {
|
||||
networkLogger().WithFields(map[string]interface{}{
|
||||
networkLogger().WithFields(logrus.Fields{
|
||||
"original_netns": n.netNSPath,
|
||||
"hypervisor_netns": hypervisorNs,
|
||||
}).Info("no endpoints in original netns, switching to hypervisor netns")
|
||||
}).Debug("no endpoints in original netns, switching to hypervisor netns")
|
||||
|
||||
origPath := n.netNSPath
|
||||
origCreated := n.netNSCreated
|
||||
@@ -414,6 +415,7 @@ func (n *LinuxNetwork) scanEndpointsInNs(ctx context.Context, s *Sandbox, nsPath
|
||||
for _, link := range linkList {
|
||||
netInfo, err := networkInfoFromLink(netlinkHandle, link)
|
||||
if err != nil {
|
||||
// No rollback needed — no endpoints were added in this iteration yet.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -685,7 +687,7 @@ func (n *LinuxNetwork) RemoveEndpoints(ctx context.Context, s *Sandbox, endpoint
|
||||
if delErr := deleteNetNS(n.placeholderNetNS); delErr != nil {
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).WithError(delErr).Warn("failed to delete placeholder netns during teardown")
|
||||
} else {
|
||||
networkLogger().Infof("Placeholder network namespace %q deleted", n.placeholderNetNS)
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).Info("placeholder network namespace deleted")
|
||||
n.placeholderNetNS = ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,7 +357,7 @@ func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
ticker := time.NewTicker(pollInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
s.Logger().Info("waiting for network interfaces in namespace")
|
||||
s.Logger().Debug("waiting for network interfaces in namespace")
|
||||
|
||||
for {
|
||||
if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil {
|
||||
@@ -371,8 +371,7 @@ func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-deadline.C:
|
||||
s.Logger().Warn("no network interfaces found after timeout")
|
||||
return nil
|
||||
return fmt.Errorf("no network interfaces discovered after %s timeout", maxWait)
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
|
||||
@@ -494,11 +494,9 @@ func RevertBytes(num uint64) uint64 {
|
||||
return 1024*RevertBytes(a) + b
|
||||
}
|
||||
|
||||
const (
|
||||
// dockerLibnetworkSetkey is the hook argument that identifies Docker's
|
||||
// network configuration hook. The argument following it is the sandbox ID.
|
||||
dockerLibnetworkSetkey = "libnetwork-setkey"
|
||||
)
|
||||
// dockerLibnetworkSetkey is the hook argument that identifies Docker's
|
||||
// network configuration hook. The argument following it is the sandbox ID.
|
||||
const dockerLibnetworkSetkey = "libnetwork-setkey"
|
||||
|
||||
// dockerNetnsPrefixes are the well-known filesystem paths where the Docker
|
||||
// daemon bind-mounts container network namespaces.
|
||||
|
||||
@@ -579,6 +579,9 @@ func TestRevertBytes(t *testing.T) {
|
||||
assert.Equal(expectedNum, num)
|
||||
}
|
||||
|
||||
// TestIsDockerContainer validates hook-detection logic in isolation.
|
||||
// End-to-end Docker→containerd→kata integration is covered by
|
||||
// external tests (see tests/integration/kubernetes/).
|
||||
func TestIsDockerContainer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
@@ -625,6 +628,9 @@ func TestIsDockerContainer(t *testing.T) {
|
||||
assert.False(IsDockerContainer(ociSpec3))
|
||||
}
|
||||
|
||||
// TestDockerNetnsPath validates netns path discovery from OCI hook args.
|
||||
// This does not test the actual namespace opening or endpoint scanning;
|
||||
// see integration tests for full-path coverage.
|
||||
func TestDockerNetnsPath(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user