diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index 79c938fb1e..29acac0c31 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -269,6 +269,50 @@ jobs: timeout-minutes: 15 run: bash tests/functional/vfio/gha-run.sh run + run-docker-tests: + name: run-docker-tests + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - qemu + runs-on: ubuntu-22.04 + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + persist-credentials: false + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/docker/gha-run.sh install-dependencies + env: + GH_TOKEN: ${{ github.token }} + + - name: get-kata-tarball + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts + + - name: Run docker smoke test + timeout-minutes: 5 + run: bash tests/integration/docker/gha-run.sh run + run-nerdctl-tests: name: run-nerdctl-tests strategy: diff --git a/.github/workflows/basic-ci-s390x.yaml b/.github/workflows/basic-ci-s390x.yaml index 7d36202ee3..bb45665e0d 100644 --- a/.github/workflows/basic-ci-s390x.yaml +++ b/.github/workflows/basic-ci-s390x.yaml @@ -123,3 +123,44 @@ jobs: - name: Run containerd-stability tests timeout-minutes: 15 run: bash tests/stability/gha-run.sh run + + run-docker-tests: + name: run-docker-tests + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: ['qemu'] + runs-on: s390x-large + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + persist-credentials: false + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/docker/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: kata-static-tarball-s390x${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts + + - name: Run docker smoke test + timeout-minutes: 5 + run: bash tests/integration/docker/gha-run.sh run diff --git a/src/runtime/pkg/containerd-shim-v2/start.go b/src/runtime/pkg/containerd-shim-v2/start.go index b4f023ab29..f0d04e1cbe 100644 --- a/src/runtime/pkg/containerd-shim-v2/start.go +++ b/src/runtime/pkg/containerd-shim-v2/start.go @@ -9,9 +9,9 @@ import ( "context" "fmt" + "github.com/containerd/containerd/api/types/task" "github.com/sirupsen/logrus" - "github.com/containerd/containerd/api/types/task" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" ) @@ -46,6 +46,19 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error } go watchSandbox(ctx, s) + // If no network endpoints were discovered during sandbox creation, + // schedule an async rescan. This handles runtimes that configure + // networking after task creation (e.g. Docker 26+ configures + // networking after the Start response, and prestart hooks may + // not have run yet on slower architectures). + // RescanNetwork is idempotent — it returns immediately if + // endpoints already exist. + go func() { + if err := s.sandbox.RescanNetwork(s.ctx); err != nil { + shimLog.WithError(err).Error("async network rescan failed — container may lack networking") + } + }() + // We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the // shim context and the context passed to startContainer for tracing. go watchOOMEvents(ctx, s) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index b377674e01..b60b0bee14 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -19,6 +19,7 @@ import ( vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -140,6 +141,17 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo sandboxConfig.Containers[0].RootFs = rootFs } + // Docker 26+ may set up networking before task creation instead of using + // prestart hooks. The netns path is not in the OCI spec but can be + // discovered from Docker's libnetwork hook args which contain the sandbox + // ID that maps to /var/run/docker/netns/. + if sandboxConfig.NetworkConfig.NetworkID == "" && !sandboxConfig.NetworkConfig.DisableNewNetwork { + if dockerNetns := utils.DockerNetnsPath(&ociSpec); dockerNetns != "" { + sandboxConfig.NetworkConfig.NetworkID = dockerNetns + kataUtilsLogger.WithField("netns", dockerNetns).Info("discovered Docker network namespace from hook args") + } + } + // Important to create the network namespace before the sandbox is // created, because it is not responsible for the creation of the // netns if it does not exist. diff --git a/src/runtime/pkg/katautils/network_linux.go b/src/runtime/pkg/katautils/network_linux.go index f2691330cf..327c17cae0 100644 --- a/src/runtime/pkg/katautils/network_linux.go +++ b/src/runtime/pkg/katautils/network_linux.go @@ -20,7 +20,9 @@ import ( "golang.org/x/sys/unix" ) -const procMountInfoFile = "/proc/self/mountinfo" +const ( + procMountInfoFile = "/proc/self/mountinfo" +) // EnterNetNS is free from any call to a go routine, and it calls // into runtime.LockOSThread(), meaning it won't be executed in a @@ -29,27 +31,30 @@ func EnterNetNS(networkID string, cb func() error) error { return vc.EnterNetNS(networkID, cb) } -// SetupNetworkNamespace create a network namespace +// SetupNetworkNamespace creates a network namespace if one is not already +// provided via NetworkID. When NetworkID is empty and networking is not +// disabled, a new namespace is created as a placeholder; the actual +// hypervisor namespace will be discovered later by addAllEndpoints after +// the VM has started. func SetupNetworkNamespace(config *vc.NetworkConfig) error { if config.DisableNewNetwork { kataUtilsLogger.Info("DisableNewNetNs is on, shim and hypervisor are running in the host netns") return nil } - var err error - var n ns.NetNS - if config.NetworkID == "" { + var ( + err error + n ns.NetNS + ) + if rootless.IsRootless() { n, err = rootless.NewNS() - if err != nil { - return err - } } else { n, err = testutils.NewNS() - if err != nil { - return err - } + } + if err != nil { + return err } config.NetworkID = n.Path() @@ -71,11 +76,23 @@ func SetupNetworkNamespace(config *vc.NetworkConfig) error { } const ( - netNsMountType = "nsfs" - mountTypeFieldIdx = 8 - mountDestIdx = 4 + netNsMountType = "nsfs" + mountDestIdx = 4 ) +// mountinfoFsType finds the filesystem type in a parsed mountinfo line. +// The mountinfo format has optional tagged fields (shared:, master:, etc.) +// between field 7 and a "-" separator. The fs type is the field immediately +// after "-". Returns "" if the separator is not found. +func mountinfoFsType(fields []string) string { + for i, f := range fields { + if f == "-" && i+1 < len(fields) { + return fields[i+1] + } + } + return "" +} + // getNetNsFromBindMount returns the network namespace for the bind-mounted path func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error) { // Resolve all symlinks in the path as the mountinfo file contains @@ -100,16 +117,15 @@ func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error) // "711 26 0:3 net:[4026532009] /run/docker/netns/default rw shared:535 - nsfs nsfs rw" // // Reference: https://www.kernel.org/doc/Documentation/filesystems/proc.txt - - // We are interested in the first 9 fields of this file, - // to check for the correct mount type. + // The "-" separator has a variable position due to optional tagged + // fields, so we locate the fs type dynamically. fields := strings.Split(text, " ") if len(fields) < 9 { continue } // We check here if the mount type is a network namespace mount type, namely "nsfs" - if fields[mountTypeFieldIdx] != netNsMountType { + if mountinfoFsType(fields) != netNsMountType { continue } diff --git a/src/runtime/pkg/katautils/network_test.go b/src/runtime/pkg/katautils/network_test.go index 8d1bf86eef..95429a0e64 100644 --- a/src/runtime/pkg/katautils/network_test.go +++ b/src/runtime/pkg/katautils/network_test.go @@ -149,3 +149,23 @@ func TestSetupNetworkNamespace(t *testing.T) { err = SetupNetworkNamespace(config) assert.NoError(err) } + +func TestMountinfoFsType(t *testing.T) { + assert := assert.New(t) + + // Standard mountinfo line with optional tagged fields + fields := []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "-", "nsfs", "nsfs", "rw"} + assert.Equal("nsfs", mountinfoFsType(fields)) + + // Multiple optional tags before separator + fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "master:1", "-", "nsfs", "nsfs", "rw"} + assert.Equal("nsfs", mountinfoFsType(fields)) + + // No separator + fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw"} + assert.Equal("", mountinfoFsType(fields)) + + // Separator at end (malformed) + fields = []string{"711", "26", "-"} + assert.Equal("", mountinfoFsType(fields)) +} diff --git a/src/runtime/virtcontainers/interfaces.go b/src/runtime/virtcontainers/interfaces.go index 742156a6b5..21dd40d25e 100644 --- a/src/runtime/virtcontainers/interfaces.go +++ b/src/runtime/virtcontainers/interfaces.go @@ -72,6 +72,8 @@ type VCSandbox interface { GetOOMEvent(ctx context.Context) (string, error) GetHypervisorPid() (int, error) + // RescanNetwork re-scans the network namespace for late-discovered endpoints. + RescanNetwork(ctx context.Context) error UpdateRuntimeMetrics() error GetAgentMetrics(ctx context.Context) (string, error) diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index ab640c65a9..a9034b20eb 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -17,9 +17,11 @@ import ( "runtime" "sort" "strconv" + "strings" "time" "github.com/containernetworking/plugins/pkg/ns" + "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" "github.com/vishvananda/netns" otelTrace "go.opentelemetry.io/otel/trace" @@ -45,6 +47,11 @@ type LinuxNetwork struct { interworkingModel NetInterworkingModel netNSCreated bool danConfigPath string + // placeholderNetNS holds the path to a placeholder network namespace + // that we created but later abandoned in favour of the hypervisor's + // netns. If best-effort deletion in addAllEndpoints fails, teardown + // retries the cleanup via RemoveEndpoints. + placeholderNetNS string } // NewNetwork creates a new Linux Network from a NetworkConfig. @@ -68,11 +75,11 @@ func NewNetwork(configs ...*NetworkConfig) (Network, error) { } return &LinuxNetwork{ - config.NetworkID, - []Endpoint{}, - config.InterworkingModel, - config.NetworkCreated, - config.DanConfigPath, + netNSPath: config.NetworkID, + eps: []Endpoint{}, + interworkingModel: config.InterworkingModel, + netNSCreated: config.NetworkCreated, + danConfigPath: config.DanConfigPath, }, nil } @@ -325,28 +332,91 @@ func (n *LinuxNetwork) GetEndpointsNum() (int, error) { // Scan the networking namespace through netlink and then: // 1. Create the endpoints for the relevant interfaces found there. // 2. Attach them to the VM. +// +// If no usable interfaces are found and the hypervisor is running in a +// different network namespace (e.g. Docker 26+ places QEMU in its own +// pre-configured namespace), switch to the hypervisor's namespace and +// rescan there. This handles the case where the OCI spec does not +// communicate the network namespace path. func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug bool) error { - netnsHandle, err := netns.GetFromPath(n.netNSPath) + endpoints, err := n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug) if err != nil { return err } + + // If the scan found no usable endpoints, check whether the + // hypervisor is running in a different namespace and retry there. + if len(endpoints) == 0 && s != nil { + if hypervisorNs, ok := n.detectHypervisorNetns(s); ok { + networkLogger().WithFields(logrus.Fields{ + "original_netns": n.netNSPath, + "hypervisor_netns": hypervisorNs, + }).Debug("no endpoints in original netns, switching to hypervisor netns") + + origPath := n.netNSPath + origCreated := n.netNSCreated + n.netNSPath = hypervisorNs + + _, err = n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug) + if err != nil { + n.netNSPath = origPath + n.netNSCreated = origCreated + return err + } + + // Clean up the placeholder namespace we created — we're now + // using the hypervisor's namespace and the placeholder is empty. + // Only clear netNSCreated once deletion succeeds; on failure, + // stash the path so RemoveEndpoints can retry during teardown. + if origCreated { + if delErr := deleteNetNS(origPath); delErr != nil { + networkLogger().WithField("netns", origPath).WithError(delErr).Warn("failed to delete placeholder netns, will retry during teardown") + n.placeholderNetNS = origPath + } + } + // The hypervisor's namespace was not created by us. + n.netNSCreated = false + } + } + + sort.Slice(n.eps, func(i, j int) bool { + return n.eps[i].Name() < n.eps[j].Name() + }) + + networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan") + + return nil +} + +// scanEndpointsInNs scans a network namespace for usable (non-loopback, +// configured) interfaces and adds them as endpoints. Returns the list of +// newly added endpoints. +func (n *LinuxNetwork) scanEndpointsInNs(ctx context.Context, s *Sandbox, nsPath string, hotplug bool) ([]Endpoint, error) { + netnsHandle, err := netns.GetFromPath(nsPath) + if err != nil { + return nil, err + } defer netnsHandle.Close() netlinkHandle, err := netlink.NewHandleAt(netnsHandle) if err != nil { - return err + return nil, err } defer netlinkHandle.Close() linkList, err := netlinkHandle.LinkList() if err != nil { - return err + return nil, err } + epsBefore := len(n.eps) + var added []Endpoint + for _, link := range linkList { netInfo, err := networkInfoFromLink(netlinkHandle, link) if err != nil { - return err + // No rollback needed — no endpoints were added in this iteration yet. + return nil, err } // Ignore unconfigured network interfaces. These are @@ -368,22 +438,62 @@ func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug continue } - if err := doNetNS(n.netNSPath, func(_ ns.NetNS) error { - _, err = n.addSingleEndpoint(ctx, s, netInfo, hotplug) - return err + if err := doNetNS(nsPath, func(_ ns.NetNS) error { + ep, addErr := n.addSingleEndpoint(ctx, s, netInfo, hotplug) + if addErr == nil { + added = append(added, ep) + } + return addErr }); err != nil { - return err + // Rollback: remove any endpoints added during this scan + // so that a failed scan does not leave partial side effects. + n.eps = n.eps[:epsBefore] + return nil, err } - } - sort.Slice(n.eps, func(i, j int) bool { - return n.eps[i].Name() < n.eps[j].Name() - }) + return added, nil +} - networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan") +// detectHypervisorNetns checks whether the hypervisor process is running in a +// network namespace different from the one we are currently tracking. If so it +// returns the procfs path to the hypervisor's netns and true. +func (n *LinuxNetwork) detectHypervisorNetns(s *Sandbox) (string, bool) { + pid, err := s.GetHypervisorPid() + if err != nil || pid <= 0 { + return "", false + } - return nil + // Guard against PID recycling: verify the process belongs to this + // sandbox by checking its command line for the sandbox ID. QEMU is + // started with -name sandbox-, so the ID will appear in cmdline. + // /proc/pid/cmdline uses null bytes as argument separators; replace + // them so the substring search works on the joined argument string. + cmdlineRaw, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) + if err != nil { + return "", false + } + cmdline := strings.ReplaceAll(string(cmdlineRaw), "\x00", " ") + if !strings.Contains(cmdline, s.id) { + return "", false + } + + hypervisorNs := fmt.Sprintf("/proc/%d/ns/net", pid) + + // Compare device and inode numbers. Inode numbers are only unique + // within a device, so both must match to confirm the same namespace. + var currentStat, hvStat unix.Stat_t + if err := unix.Stat(n.netNSPath, ¤tStat); err != nil { + return "", false + } + if err := unix.Stat(hypervisorNs, &hvStat); err != nil { + return "", false + } + + if currentStat.Dev != hvStat.Dev || currentStat.Ino != hvStat.Ino { + return hypervisorNs, true + } + return "", false } func convertDanDeviceToNetworkInfo(device *vctypes.DanDevice) (*NetworkInfo, error) { @@ -571,6 +681,17 @@ func (n *LinuxNetwork) RemoveEndpoints(ctx context.Context, s *Sandbox, endpoint return deleteNetNS(n.netNSPath) } + // Retry cleanup of a placeholder namespace whose earlier deletion + // failed in addAllEndpoints. + if n.placeholderNetNS != "" && endpoints == nil { + if delErr := deleteNetNS(n.placeholderNetNS); delErr != nil { + networkLogger().WithField("netns", n.placeholderNetNS).WithError(delErr).Warn("failed to delete placeholder netns during teardown") + } else { + networkLogger().WithField("netns", n.placeholderNetNS).Info("placeholder network namespace deleted") + n.placeholderNetNS = "" + } + } + return nil } diff --git a/src/runtime/virtcontainers/network_linux_test.go b/src/runtime/virtcontainers/network_linux_test.go index 4ab36e04e4..3e93c2356c 100644 --- a/src/runtime/virtcontainers/network_linux_test.go +++ b/src/runtime/virtcontainers/network_linux_test.go @@ -363,11 +363,11 @@ func TestConvertDanDeviceToNetworkInfo(t *testing.T) { func TestAddEndpoints_Dan(t *testing.T) { network := &LinuxNetwork{ - "net-123", - []Endpoint{}, - NetXConnectDefaultModel, - true, - "testdata/dan-config.json", + netNSPath: "net-123", + eps: []Endpoint{}, + interworkingModel: NetXConnectDefaultModel, + netNSCreated: true, + danConfigPath: "testdata/dan-config.json", } ctx := context.TODO() diff --git a/src/runtime/virtcontainers/pkg/vcmock/sandbox.go b/src/runtime/virtcontainers/pkg/vcmock/sandbox.go index 09ece4e765..9657671099 100644 --- a/src/runtime/virtcontainers/pkg/vcmock/sandbox.go +++ b/src/runtime/virtcontainers/pkg/vcmock/sandbox.go @@ -255,6 +255,10 @@ func (s *Sandbox) GetHypervisorPid() (int, error) { return 0, nil } +func (s *Sandbox) RescanNetwork(ctx context.Context) error { + return nil +} + func (s *Sandbox) GuestVolumeStats(ctx context.Context, path string) ([]byte, error) { return nil, nil } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index c2f51f7cba..2a878538d2 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -20,6 +20,7 @@ import ( "strings" "sync" "syscall" + "time" v1 "github.com/containerd/cgroups/stats/v1" v2 "github.com/containerd/cgroups/v2/stats" @@ -330,6 +331,81 @@ func (s *Sandbox) GetHypervisorPid() (int, error) { return pids[0], nil } +// RescanNetwork re-scans the network namespace for endpoints if none have +// been discovered yet. This is idempotent: if endpoints already exist it +// returns immediately. It enables Docker 26+ support where networking is +// configured after task creation but before Start. +// +// Docker 26+ configures networking (veth pair, IP addresses) between +// Create and Start. The interfaces may not be present immediately, so +// this method polls until they appear or a timeout is reached. +// +// When new endpoints are found, the guest agent is informed about the +// interfaces and routes so that networking becomes functional inside the VM. +func (s *Sandbox) RescanNetwork(ctx context.Context) error { + if s.config.NetworkConfig.DisableNewNetwork { + return nil + } + if len(s.network.Endpoints()) > 0 { + return nil + } + + const maxWait = 5 * time.Second + const pollInterval = 50 * time.Millisecond + deadline := time.NewTimer(maxWait) + defer deadline.Stop() + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + s.Logger().Debug("waiting for network interfaces in namespace") + + for { + if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil { + return err + } + if len(s.network.Endpoints()) > 0 { + return s.configureGuestNetwork(ctx) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline.C: + s.Logger().Warn("no network interfaces found after timeout — networking may be configured by prestart hooks") + return nil + case <-ticker.C: + } + } +} + +// configureGuestNetwork informs the guest agent about discovered network +// endpoints so that interfaces and routes become functional inside the VM. +func (s *Sandbox) configureGuestNetwork(ctx context.Context) error { + endpoints := s.network.Endpoints() + s.Logger().WithField("endpoints", len(endpoints)).Info("configuring hotplugged network in guest") + + // Note: ARP neighbors (3rd return value) are not propagated here + // because the agent interface only exposes per-entry updates. The + // full setupNetworks path in kataAgent handles them; this path is + // only reached for late-discovered endpoints where neighbor entries + // are populated dynamically by the kernel. + interfaces, routes, _, err := generateVCNetworkStructures(ctx, endpoints) + if err != nil { + return fmt.Errorf("generating network structures: %w", err) + } + for _, ifc := range interfaces { + if _, err := s.agent.updateInterface(ctx, ifc); err != nil { + return fmt.Errorf("updating interface %s in guest: %w", ifc.Name, err) + } + } + if len(routes) > 0 { + if _, err := s.agent.updateRoutes(ctx, routes); err != nil { + return fmt.Errorf("updating routes in guest: %w", err) + } + } + return nil +} + // GetAllContainers returns all containers. func (s *Sandbox) GetAllContainers() []VCContainer { ifa := make([]VCContainer, len(s.containers)) diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index dfe115bef9..39bcfde8f4 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -12,6 +12,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strings" "syscall" "time" @@ -493,17 +494,38 @@ func RevertBytes(num uint64) uint64 { return 1024*RevertBytes(a) + b } +// dockerLibnetworkSetkey is the hook argument that identifies Docker's +// network configuration hook. The argument following it is the sandbox ID. +const dockerLibnetworkSetkey = "libnetwork-setkey" + +// dockerNetnsPrefixes are the well-known filesystem paths where the Docker +// daemon bind-mounts container network namespaces. +var dockerNetnsPrefixes = []string{"/var/run/docker/netns/", "/run/docker/netns/"} + +// validSandboxID matches Docker sandbox IDs: exactly 64 lowercase hex characters. +var validSandboxID = regexp.MustCompile(`^[0-9a-f]{64}$`) + // IsDockerContainer returns if the container is managed by docker -// This is done by checking the prestart hook for `libnetwork` arguments. +// This is done by checking the prestart and createRuntime hooks for +// `libnetwork` arguments. Docker 26+ may use CreateRuntime hooks +// instead of the deprecated Prestart hooks. func IsDockerContainer(spec *specs.Spec) bool { if spec == nil || spec.Hooks == nil { return false } - for _, hook := range spec.Hooks.Prestart { //nolint:all - for _, arg := range hook.Args { - if strings.HasPrefix(arg, "libnetwork") { - return true + // Check both Prestart (Docker < 26) and CreateRuntime (Docker >= 26) hooks. + hookSets := [][]specs.Hook{ + spec.Hooks.Prestart, //nolint:all + spec.Hooks.CreateRuntime, + } + + for _, hooks := range hookSets { + for _, hook := range hooks { + for _, arg := range hook.Args { + if strings.HasPrefix(arg, "libnetwork") { + return true + } } } } @@ -511,6 +533,50 @@ func IsDockerContainer(spec *specs.Spec) bool { return false } +// DockerNetnsPath attempts to discover Docker's pre-created network namespace +// path from OCI spec hooks. Docker's libnetwork-setkey hook contains the +// sandbox ID as its second argument, which maps to the netns file under +// /var/run/docker/netns/. +func DockerNetnsPath(spec *specs.Spec) string { + if spec == nil || spec.Hooks == nil { + return "" + } + + // Search both Prestart and CreateRuntime hooks for libnetwork-setkey. + hookSets := [][]specs.Hook{ + spec.Hooks.Prestart, //nolint:all + spec.Hooks.CreateRuntime, + } + + for _, hooks := range hookSets { + for _, hook := range hooks { + for i, arg := range hook.Args { + if arg == dockerLibnetworkSetkey && i+1 < len(hook.Args) { + sandboxID := hook.Args[i+1] + // Docker sandbox IDs are exactly 64 lowercase hex + // characters. Reject anything else to prevent path + // traversal and unexpected input. + if !validSandboxID.MatchString(sandboxID) { + continue + } + // Docker stores netns under well-known paths. + // Use Lstat to reject symlinks (which could point + // outside the Docker netns directory) and non-regular + // files such as directories. + for _, prefix := range dockerNetnsPrefixes { + nsPath := prefix + sandboxID + if fi, err := os.Lstat(nsPath); err == nil && fi.Mode().IsRegular() { + return nsPath + } + } + } + } + } + } + + return "" +} + // GetGuestNUMANodes constructs guest NUMA nodes mapping to host NUMA nodes and host CPUs. func GetGuestNUMANodes(numaMapping []string) ([]types.GuestNUMANode, error) { // Add guest NUMA node for each specified subsets of host NUMA nodes. diff --git a/src/runtime/virtcontainers/utils/utils_test.go b/src/runtime/virtcontainers/utils/utils_test.go index 0017072af8..8361caa1ee 100644 --- a/src/runtime/virtcontainers/utils/utils_test.go +++ b/src/runtime/virtcontainers/utils/utils_test.go @@ -579,24 +579,178 @@ func TestRevertBytes(t *testing.T) { assert.Equal(expectedNum, num) } +// TestIsDockerContainer validates hook-detection logic in isolation. +// End-to-end Docker→containerd→kata integration is covered by +// external tests (see tests/integration/kubernetes/). func TestIsDockerContainer(t *testing.T) { assert := assert.New(t) + // nil spec + assert.False(IsDockerContainer(nil)) + + // nil hooks + assert.False(IsDockerContainer(&specs.Spec{})) + + // Unrelated prestart hook ociSpec := &specs.Spec{ Hooks: &specs.Hooks{ - Prestart: []specs.Hook{ - { - Args: []string{ - "haha", - }, - }, + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"haha"}}, }, }, } assert.False(IsDockerContainer(ociSpec)) + // Prestart hook with libnetwork (Docker < 26) ociSpec.Hooks.Prestart = append(ociSpec.Hooks.Prestart, specs.Hook{ //nolint:all Args: []string{"libnetwork-xxx"}, }) assert.True(IsDockerContainer(ociSpec)) + + // CreateRuntime hook with libnetwork (Docker >= 26) + ociSpec2 := &specs.Spec{ + Hooks: &specs.Hooks{ + CreateRuntime: []specs.Hook{ + {Args: []string{"/usr/bin/docker-proxy", "libnetwork-setkey", "abc123", "ctrl"}}, + }, + }, + } + assert.True(IsDockerContainer(ociSpec2)) + + // CreateRuntime hook without libnetwork + ociSpec3 := &specs.Spec{ + Hooks: &specs.Hooks{ + CreateRuntime: []specs.Hook{ + {Args: []string{"/some/other/hook"}}, + }, + }, + } + assert.False(IsDockerContainer(ociSpec3)) +} + +// TestDockerNetnsPath validates netns path discovery from OCI hook args. +// This does not test the actual namespace opening or endpoint scanning; +// see integration tests for full-path coverage. +func TestDockerNetnsPath(t *testing.T) { + assert := assert.New(t) + + // Valid 64-char hex sandbox IDs for test cases. + validID := strings.Repeat("ab", 32) // 64 hex chars + validID2 := strings.Repeat("cd", 32) // another 64 hex chars + invalidShortID := "abc123" // too short + invalidUpperID := strings.Repeat("AB", 32) // uppercase rejected + + // nil spec + assert.Equal("", DockerNetnsPath(nil)) + + // nil hooks + assert.Equal("", DockerNetnsPath(&specs.Spec{})) + + // Hook without libnetwork-setkey + spec := &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/some/binary", "unrelated"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Prestart hook with libnetwork-setkey but sandbox ID too short (rejected by regex) + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidShortID, "ctrl"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Prestart hook with libnetwork-setkey but uppercase hex (rejected by regex) + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidUpperID, "ctrl"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Prestart hook with valid sandbox ID but netns file doesn't exist on disk + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Prestart hook with libnetwork-setkey and existing netns file — success path + tmpDir := t.TempDir() + fakeNsDir := filepath.Join(tmpDir, "netns") + err := os.MkdirAll(fakeNsDir, 0755) + assert.NoError(err) + fakeNsFile := filepath.Join(fakeNsDir, validID) + err = os.WriteFile(fakeNsFile, []byte{}, 0644) + assert.NoError(err) + + // Temporarily override dockerNetnsPrefixes so DockerNetnsPath can find + // the netns file we created under the temp directory. + origPrefixes := dockerNetnsPrefixes + dockerNetnsPrefixes = []string{fakeNsDir + "/"} + defer func() { dockerNetnsPrefixes = origPrefixes }() + + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}}, + }, + }, + } + assert.Equal(fakeNsFile, DockerNetnsPath(spec)) + + // Sandbox ID that is a directory rather than a regular file — must be rejected + dirID := validID2 + err = os.MkdirAll(filepath.Join(fakeNsDir, dirID), 0755) + assert.NoError(err) + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", dirID, "ctrl"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // CreateRuntime hook with valid sandbox ID — file doesn't exist + validID3 := strings.Repeat("ef", 32) + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + CreateRuntime: []specs.Hook{ + {Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID3, "ctrl"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Hook with libnetwork-setkey as last arg (no sandbox ID follows) — no panic + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{"libnetwork-setkey"}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) + + // Empty args slice + spec = &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ //nolint:all + {Args: []string{}}, + }, + }, + } + assert.Equal("", DockerNetnsPath(spec)) } diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh new file mode 100755 index 0000000000..6860cd108a --- /dev/null +++ b/tests/integration/docker/gha-run.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +docker_dir="$(dirname "$(readlink -f "$0")")" +source "${docker_dir}/../../common.bash" +image="${image:-instrumentisto/nmap:latest}" + +function install_dependencies() { + info "Installing the dependencies needed for running the docker smoke test" + + sudo -E docker pull "${image}" +} + +function run() { + info "Running docker smoke test tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + + info "Running docker with runc" + sudo docker run --rm --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com + + info "Running docker with Kata Containers (${KATA_HYPERVISOR})" + sudo docker run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@"