mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-04 11:03:52 +00:00
runtime: fix Docker 26+ networking by rescanning after Start
Docker 26+ configures container networking (veth pair, IP addresses, routes) after task creation rather than before. Kata's endpoint scan runs during CreateSandbox, before the interfaces exist, resulting in VMs starting without network connectivity (no -netdev passed to QEMU). Add RescanNetwork() which runs asynchronously after the Start RPC. It polls the network namespace until Docker's interfaces appear, then hotplugs them to QEMU and informs the guest agent to configure them inside the VM. Additional fixes: - mountinfo parser: find fs type dynamically instead of hardcoded field index, fixing parsing with optional mount tags (shared:, master:) - IsDockerContainer: check CreateRuntime hooks for Docker 26+ - DockerNetnsPath: extract netns path from libnetwork-setkey hook args with path traversal protection - detectHypervisorNetns: verify PID ownership via /proc/pid/cmdline to guard against PID recycling - startVM guard: rescan when len(endpoints)==0 after VM start Fixes: #9340 Signed-off-by: llink5 <llink5@users.noreply.github.com>
This commit is contained in:
44
.github/workflows/basic-ci-amd64.yaml
vendored
44
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -269,6 +269,50 @@ jobs:
|
||||
timeout-minutes: 15
|
||||
run: bash tests/functional/vfio/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
name: run-docker-tests
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
run-nerdctl-tests:
|
||||
name: run-nerdctl-tests
|
||||
strategy:
|
||||
|
||||
41
.github/workflows/basic-ci-s390x.yaml
vendored
41
.github/workflows/basic-ci-s390x.yaml
vendored
@@ -123,3 +123,44 @@ jobs:
|
||||
- name: Run containerd-stability tests
|
||||
timeout-minutes: 15
|
||||
run: bash tests/stability/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
name: run-docker-tests
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm: ['qemu']
|
||||
runs-on: s390x-large
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-s390x${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||
)
|
||||
|
||||
@@ -46,6 +46,19 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
|
||||
}
|
||||
go watchSandbox(ctx, s)
|
||||
|
||||
// If no network endpoints were discovered during sandbox creation,
|
||||
// schedule an async rescan. This handles runtimes that configure
|
||||
// networking after task creation (e.g. Docker 26+ configures
|
||||
// networking after the Start response, and prestart hooks may
|
||||
// not have run yet on slower architectures).
|
||||
// RescanNetwork is idempotent — it returns immediately if
|
||||
// endpoints already exist.
|
||||
go func() {
|
||||
if err := s.sandbox.RescanNetwork(s.ctx); err != nil {
|
||||
shimLog.WithError(err).Error("async network rescan failed — container may lack networking")
|
||||
}
|
||||
}()
|
||||
|
||||
// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
|
||||
// shim context and the context passed to startContainer for tracing.
|
||||
go watchOOMEvents(ctx, s)
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
|
||||
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
@@ -140,6 +141,17 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo
|
||||
sandboxConfig.Containers[0].RootFs = rootFs
|
||||
}
|
||||
|
||||
// Docker 26+ may set up networking before task creation instead of using
|
||||
// prestart hooks. The netns path is not in the OCI spec but can be
|
||||
// discovered from Docker's libnetwork hook args which contain the sandbox
|
||||
// ID that maps to /var/run/docker/netns/<sandbox_id>.
|
||||
if sandboxConfig.NetworkConfig.NetworkID == "" && !sandboxConfig.NetworkConfig.DisableNewNetwork {
|
||||
if dockerNetns := utils.DockerNetnsPath(&ociSpec); dockerNetns != "" {
|
||||
sandboxConfig.NetworkConfig.NetworkID = dockerNetns
|
||||
kataUtilsLogger.WithField("netns", dockerNetns).Info("discovered Docker network namespace from hook args")
|
||||
}
|
||||
}
|
||||
|
||||
// Important to create the network namespace before the sandbox is
|
||||
// created, because it is not responsible for the creation of the
|
||||
// netns if it does not exist.
|
||||
|
||||
@@ -20,7 +20,9 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const procMountInfoFile = "/proc/self/mountinfo"
|
||||
const (
|
||||
procMountInfoFile = "/proc/self/mountinfo"
|
||||
)
|
||||
|
||||
// EnterNetNS is free from any call to a go routine, and it calls
|
||||
// into runtime.LockOSThread(), meaning it won't be executed in a
|
||||
@@ -29,27 +31,30 @@ func EnterNetNS(networkID string, cb func() error) error {
|
||||
return vc.EnterNetNS(networkID, cb)
|
||||
}
|
||||
|
||||
// SetupNetworkNamespace create a network namespace
|
||||
// SetupNetworkNamespace creates a network namespace if one is not already
|
||||
// provided via NetworkID. When NetworkID is empty and networking is not
|
||||
// disabled, a new namespace is created as a placeholder; the actual
|
||||
// hypervisor namespace will be discovered later by addAllEndpoints after
|
||||
// the VM has started.
|
||||
func SetupNetworkNamespace(config *vc.NetworkConfig) error {
|
||||
if config.DisableNewNetwork {
|
||||
kataUtilsLogger.Info("DisableNewNetNs is on, shim and hypervisor are running in the host netns")
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
var n ns.NetNS
|
||||
|
||||
if config.NetworkID == "" {
|
||||
var (
|
||||
err error
|
||||
n ns.NetNS
|
||||
)
|
||||
|
||||
if rootless.IsRootless() {
|
||||
n, err = rootless.NewNS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
n, err = testutils.NewNS()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
config.NetworkID = n.Path()
|
||||
@@ -71,11 +76,23 @@ func SetupNetworkNamespace(config *vc.NetworkConfig) error {
|
||||
}
|
||||
|
||||
const (
|
||||
netNsMountType = "nsfs"
|
||||
mountTypeFieldIdx = 8
|
||||
mountDestIdx = 4
|
||||
netNsMountType = "nsfs"
|
||||
mountDestIdx = 4
|
||||
)
|
||||
|
||||
// mountinfoFsType finds the filesystem type in a parsed mountinfo line.
|
||||
// The mountinfo format has optional tagged fields (shared:, master:, etc.)
|
||||
// between field 7 and a "-" separator. The fs type is the field immediately
|
||||
// after "-". Returns "" if the separator is not found.
|
||||
func mountinfoFsType(fields []string) string {
|
||||
for i, f := range fields {
|
||||
if f == "-" && i+1 < len(fields) {
|
||||
return fields[i+1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getNetNsFromBindMount returns the network namespace for the bind-mounted path
|
||||
func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error) {
|
||||
// Resolve all symlinks in the path as the mountinfo file contains
|
||||
@@ -100,16 +117,15 @@ func getNetNsFromBindMount(nsPath string, procMountFile string) (string, error)
|
||||
// "711 26 0:3 net:[4026532009] /run/docker/netns/default rw shared:535 - nsfs nsfs rw"
|
||||
//
|
||||
// Reference: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
||||
|
||||
// We are interested in the first 9 fields of this file,
|
||||
// to check for the correct mount type.
|
||||
// The "-" separator has a variable position due to optional tagged
|
||||
// fields, so we locate the fs type dynamically.
|
||||
fields := strings.Split(text, " ")
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
|
||||
// We check here if the mount type is a network namespace mount type, namely "nsfs"
|
||||
if fields[mountTypeFieldIdx] != netNsMountType {
|
||||
if mountinfoFsType(fields) != netNsMountType {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -149,3 +149,23 @@ func TestSetupNetworkNamespace(t *testing.T) {
|
||||
err = SetupNetworkNamespace(config)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestMountinfoFsType(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Standard mountinfo line with optional tagged fields
|
||||
fields := []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "-", "nsfs", "nsfs", "rw"}
|
||||
assert.Equal("nsfs", mountinfoFsType(fields))
|
||||
|
||||
// Multiple optional tags before separator
|
||||
fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw", "shared:535", "master:1", "-", "nsfs", "nsfs", "rw"}
|
||||
assert.Equal("nsfs", mountinfoFsType(fields))
|
||||
|
||||
// No separator
|
||||
fields = []string{"711", "26", "0:3", "net:[4026532009]", "/run/docker/netns/default", "rw"}
|
||||
assert.Equal("", mountinfoFsType(fields))
|
||||
|
||||
// Separator at end (malformed)
|
||||
fields = []string{"711", "26", "-"}
|
||||
assert.Equal("", mountinfoFsType(fields))
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ type VCSandbox interface {
|
||||
|
||||
GetOOMEvent(ctx context.Context) (string, error)
|
||||
GetHypervisorPid() (int, error)
|
||||
// RescanNetwork re-scans the network namespace for late-discovered endpoints.
|
||||
RescanNetwork(ctx context.Context) error
|
||||
|
||||
UpdateRuntimeMetrics() error
|
||||
GetAgentMetrics(ctx context.Context) (string, error)
|
||||
|
||||
@@ -17,9 +17,11 @@ import (
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containernetworking/plugins/pkg/ns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"github.com/vishvananda/netns"
|
||||
otelTrace "go.opentelemetry.io/otel/trace"
|
||||
@@ -45,6 +47,11 @@ type LinuxNetwork struct {
|
||||
interworkingModel NetInterworkingModel
|
||||
netNSCreated bool
|
||||
danConfigPath string
|
||||
// placeholderNetNS holds the path to a placeholder network namespace
|
||||
// that we created but later abandoned in favour of the hypervisor's
|
||||
// netns. If best-effort deletion in addAllEndpoints fails, teardown
|
||||
// retries the cleanup via RemoveEndpoints.
|
||||
placeholderNetNS string
|
||||
}
|
||||
|
||||
// NewNetwork creates a new Linux Network from a NetworkConfig.
|
||||
@@ -68,11 +75,11 @@ func NewNetwork(configs ...*NetworkConfig) (Network, error) {
|
||||
}
|
||||
|
||||
return &LinuxNetwork{
|
||||
config.NetworkID,
|
||||
[]Endpoint{},
|
||||
config.InterworkingModel,
|
||||
config.NetworkCreated,
|
||||
config.DanConfigPath,
|
||||
netNSPath: config.NetworkID,
|
||||
eps: []Endpoint{},
|
||||
interworkingModel: config.InterworkingModel,
|
||||
netNSCreated: config.NetworkCreated,
|
||||
danConfigPath: config.DanConfigPath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -325,28 +332,91 @@ func (n *LinuxNetwork) GetEndpointsNum() (int, error) {
|
||||
// Scan the networking namespace through netlink and then:
|
||||
// 1. Create the endpoints for the relevant interfaces found there.
|
||||
// 2. Attach them to the VM.
|
||||
//
|
||||
// If no usable interfaces are found and the hypervisor is running in a
|
||||
// different network namespace (e.g. Docker 26+ places QEMU in its own
|
||||
// pre-configured namespace), switch to the hypervisor's namespace and
|
||||
// rescan there. This handles the case where the OCI spec does not
|
||||
// communicate the network namespace path.
|
||||
func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug bool) error {
|
||||
netnsHandle, err := netns.GetFromPath(n.netNSPath)
|
||||
endpoints, err := n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If the scan found no usable endpoints, check whether the
|
||||
// hypervisor is running in a different namespace and retry there.
|
||||
if len(endpoints) == 0 && s != nil {
|
||||
if hypervisorNs, ok := n.detectHypervisorNetns(s); ok {
|
||||
networkLogger().WithFields(logrus.Fields{
|
||||
"original_netns": n.netNSPath,
|
||||
"hypervisor_netns": hypervisorNs,
|
||||
}).Debug("no endpoints in original netns, switching to hypervisor netns")
|
||||
|
||||
origPath := n.netNSPath
|
||||
origCreated := n.netNSCreated
|
||||
n.netNSPath = hypervisorNs
|
||||
|
||||
_, err = n.scanEndpointsInNs(ctx, s, n.netNSPath, hotplug)
|
||||
if err != nil {
|
||||
n.netNSPath = origPath
|
||||
n.netNSCreated = origCreated
|
||||
return err
|
||||
}
|
||||
|
||||
// Clean up the placeholder namespace we created — we're now
|
||||
// using the hypervisor's namespace and the placeholder is empty.
|
||||
// Only clear netNSCreated once deletion succeeds; on failure,
|
||||
// stash the path so RemoveEndpoints can retry during teardown.
|
||||
if origCreated {
|
||||
if delErr := deleteNetNS(origPath); delErr != nil {
|
||||
networkLogger().WithField("netns", origPath).WithError(delErr).Warn("failed to delete placeholder netns, will retry during teardown")
|
||||
n.placeholderNetNS = origPath
|
||||
}
|
||||
}
|
||||
// The hypervisor's namespace was not created by us.
|
||||
n.netNSCreated = false
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(n.eps, func(i, j int) bool {
|
||||
return n.eps[i].Name() < n.eps[j].Name()
|
||||
})
|
||||
|
||||
networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// scanEndpointsInNs scans a network namespace for usable (non-loopback,
|
||||
// configured) interfaces and adds them as endpoints. Returns the list of
|
||||
// newly added endpoints.
|
||||
func (n *LinuxNetwork) scanEndpointsInNs(ctx context.Context, s *Sandbox, nsPath string, hotplug bool) ([]Endpoint, error) {
|
||||
netnsHandle, err := netns.GetFromPath(nsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer netnsHandle.Close()
|
||||
|
||||
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
defer netlinkHandle.Close()
|
||||
|
||||
linkList, err := netlinkHandle.LinkList()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
epsBefore := len(n.eps)
|
||||
var added []Endpoint
|
||||
|
||||
for _, link := range linkList {
|
||||
netInfo, err := networkInfoFromLink(netlinkHandle, link)
|
||||
if err != nil {
|
||||
return err
|
||||
// No rollback needed — no endpoints were added in this iteration yet.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ignore unconfigured network interfaces. These are
|
||||
@@ -368,22 +438,62 @@ func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug
|
||||
continue
|
||||
}
|
||||
|
||||
if err := doNetNS(n.netNSPath, func(_ ns.NetNS) error {
|
||||
_, err = n.addSingleEndpoint(ctx, s, netInfo, hotplug)
|
||||
return err
|
||||
if err := doNetNS(nsPath, func(_ ns.NetNS) error {
|
||||
ep, addErr := n.addSingleEndpoint(ctx, s, netInfo, hotplug)
|
||||
if addErr == nil {
|
||||
added = append(added, ep)
|
||||
}
|
||||
return addErr
|
||||
}); err != nil {
|
||||
return err
|
||||
// Rollback: remove any endpoints added during this scan
|
||||
// so that a failed scan does not leave partial side effects.
|
||||
n.eps = n.eps[:epsBefore]
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sort.Slice(n.eps, func(i, j int) bool {
|
||||
return n.eps[i].Name() < n.eps[j].Name()
|
||||
})
|
||||
return added, nil
|
||||
}
|
||||
|
||||
networkLogger().WithField("endpoints", n.eps).Info("endpoints found after scan")
|
||||
// detectHypervisorNetns checks whether the hypervisor process is running in a
|
||||
// network namespace different from the one we are currently tracking. If so it
|
||||
// returns the procfs path to the hypervisor's netns and true.
|
||||
func (n *LinuxNetwork) detectHypervisorNetns(s *Sandbox) (string, bool) {
|
||||
pid, err := s.GetHypervisorPid()
|
||||
if err != nil || pid <= 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return nil
|
||||
// Guard against PID recycling: verify the process belongs to this
|
||||
// sandbox by checking its command line for the sandbox ID. QEMU is
|
||||
// started with -name sandbox-<id>, so the ID will appear in cmdline.
|
||||
// /proc/pid/cmdline uses null bytes as argument separators; replace
|
||||
// them so the substring search works on the joined argument string.
|
||||
cmdlineRaw, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
cmdline := strings.ReplaceAll(string(cmdlineRaw), "\x00", " ")
|
||||
if !strings.Contains(cmdline, s.id) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
hypervisorNs := fmt.Sprintf("/proc/%d/ns/net", pid)
|
||||
|
||||
// Compare device and inode numbers. Inode numbers are only unique
|
||||
// within a device, so both must match to confirm the same namespace.
|
||||
var currentStat, hvStat unix.Stat_t
|
||||
if err := unix.Stat(n.netNSPath, ¤tStat); err != nil {
|
||||
return "", false
|
||||
}
|
||||
if err := unix.Stat(hypervisorNs, &hvStat); err != nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
if currentStat.Dev != hvStat.Dev || currentStat.Ino != hvStat.Ino {
|
||||
return hypervisorNs, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func convertDanDeviceToNetworkInfo(device *vctypes.DanDevice) (*NetworkInfo, error) {
|
||||
@@ -571,6 +681,17 @@ func (n *LinuxNetwork) RemoveEndpoints(ctx context.Context, s *Sandbox, endpoint
|
||||
return deleteNetNS(n.netNSPath)
|
||||
}
|
||||
|
||||
// Retry cleanup of a placeholder namespace whose earlier deletion
|
||||
// failed in addAllEndpoints.
|
||||
if n.placeholderNetNS != "" && endpoints == nil {
|
||||
if delErr := deleteNetNS(n.placeholderNetNS); delErr != nil {
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).WithError(delErr).Warn("failed to delete placeholder netns during teardown")
|
||||
} else {
|
||||
networkLogger().WithField("netns", n.placeholderNetNS).Info("placeholder network namespace deleted")
|
||||
n.placeholderNetNS = ""
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -363,11 +363,11 @@ func TestConvertDanDeviceToNetworkInfo(t *testing.T) {
|
||||
func TestAddEndpoints_Dan(t *testing.T) {
|
||||
|
||||
network := &LinuxNetwork{
|
||||
"net-123",
|
||||
[]Endpoint{},
|
||||
NetXConnectDefaultModel,
|
||||
true,
|
||||
"testdata/dan-config.json",
|
||||
netNSPath: "net-123",
|
||||
eps: []Endpoint{},
|
||||
interworkingModel: NetXConnectDefaultModel,
|
||||
netNSCreated: true,
|
||||
danConfigPath: "testdata/dan-config.json",
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
|
||||
@@ -255,6 +255,10 @@ func (s *Sandbox) GetHypervisorPid() (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) GuestVolumeStats(ctx context.Context, path string) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
v2 "github.com/containerd/cgroups/v2/stats"
|
||||
@@ -330,6 +331,81 @@ func (s *Sandbox) GetHypervisorPid() (int, error) {
|
||||
return pids[0], nil
|
||||
}
|
||||
|
||||
// RescanNetwork re-scans the network namespace for endpoints if none have
|
||||
// been discovered yet. This is idempotent: if endpoints already exist it
|
||||
// returns immediately. It enables Docker 26+ support where networking is
|
||||
// configured after task creation but before Start.
|
||||
//
|
||||
// Docker 26+ configures networking (veth pair, IP addresses) between
|
||||
// Create and Start. The interfaces may not be present immediately, so
|
||||
// this method polls until they appear or a timeout is reached.
|
||||
//
|
||||
// When new endpoints are found, the guest agent is informed about the
|
||||
// interfaces and routes so that networking becomes functional inside the VM.
|
||||
func (s *Sandbox) RescanNetwork(ctx context.Context) error {
|
||||
if s.config.NetworkConfig.DisableNewNetwork {
|
||||
return nil
|
||||
}
|
||||
if len(s.network.Endpoints()) > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
const maxWait = 5 * time.Second
|
||||
const pollInterval = 50 * time.Millisecond
|
||||
deadline := time.NewTimer(maxWait)
|
||||
defer deadline.Stop()
|
||||
ticker := time.NewTicker(pollInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
s.Logger().Debug("waiting for network interfaces in namespace")
|
||||
|
||||
for {
|
||||
if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(s.network.Endpoints()) > 0 {
|
||||
return s.configureGuestNetwork(ctx)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-deadline.C:
|
||||
s.Logger().Warn("no network interfaces found after timeout — networking may be configured by prestart hooks")
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// configureGuestNetwork informs the guest agent about discovered network
|
||||
// endpoints so that interfaces and routes become functional inside the VM.
|
||||
func (s *Sandbox) configureGuestNetwork(ctx context.Context) error {
|
||||
endpoints := s.network.Endpoints()
|
||||
s.Logger().WithField("endpoints", len(endpoints)).Info("configuring hotplugged network in guest")
|
||||
|
||||
// Note: ARP neighbors (3rd return value) are not propagated here
|
||||
// because the agent interface only exposes per-entry updates. The
|
||||
// full setupNetworks path in kataAgent handles them; this path is
|
||||
// only reached for late-discovered endpoints where neighbor entries
|
||||
// are populated dynamically by the kernel.
|
||||
interfaces, routes, _, err := generateVCNetworkStructures(ctx, endpoints)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating network structures: %w", err)
|
||||
}
|
||||
for _, ifc := range interfaces {
|
||||
if _, err := s.agent.updateInterface(ctx, ifc); err != nil {
|
||||
return fmt.Errorf("updating interface %s in guest: %w", ifc.Name, err)
|
||||
}
|
||||
}
|
||||
if len(routes) > 0 {
|
||||
if _, err := s.agent.updateRoutes(ctx, routes); err != nil {
|
||||
return fmt.Errorf("updating routes in guest: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllContainers returns all containers.
|
||||
func (s *Sandbox) GetAllContainers() []VCContainer {
|
||||
ifa := make([]VCContainer, len(s.containers))
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -493,17 +494,38 @@ func RevertBytes(num uint64) uint64 {
|
||||
return 1024*RevertBytes(a) + b
|
||||
}
|
||||
|
||||
// dockerLibnetworkSetkey is the hook argument that identifies Docker's
|
||||
// network configuration hook. The argument following it is the sandbox ID.
|
||||
const dockerLibnetworkSetkey = "libnetwork-setkey"
|
||||
|
||||
// dockerNetnsPrefixes are the well-known filesystem paths where the Docker
|
||||
// daemon bind-mounts container network namespaces.
|
||||
var dockerNetnsPrefixes = []string{"/var/run/docker/netns/", "/run/docker/netns/"}
|
||||
|
||||
// validSandboxID matches Docker sandbox IDs: exactly 64 lowercase hex characters.
|
||||
var validSandboxID = regexp.MustCompile(`^[0-9a-f]{64}$`)
|
||||
|
||||
// IsDockerContainer returns if the container is managed by docker
|
||||
// This is done by checking the prestart hook for `libnetwork` arguments.
|
||||
// This is done by checking the prestart and createRuntime hooks for
|
||||
// `libnetwork` arguments. Docker 26+ may use CreateRuntime hooks
|
||||
// instead of the deprecated Prestart hooks.
|
||||
func IsDockerContainer(spec *specs.Spec) bool {
|
||||
if spec == nil || spec.Hooks == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, hook := range spec.Hooks.Prestart { //nolint:all
|
||||
for _, arg := range hook.Args {
|
||||
if strings.HasPrefix(arg, "libnetwork") {
|
||||
return true
|
||||
// Check both Prestart (Docker < 26) and CreateRuntime (Docker >= 26) hooks.
|
||||
hookSets := [][]specs.Hook{
|
||||
spec.Hooks.Prestart, //nolint:all
|
||||
spec.Hooks.CreateRuntime,
|
||||
}
|
||||
|
||||
for _, hooks := range hookSets {
|
||||
for _, hook := range hooks {
|
||||
for _, arg := range hook.Args {
|
||||
if strings.HasPrefix(arg, "libnetwork") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -511,6 +533,50 @@ func IsDockerContainer(spec *specs.Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// DockerNetnsPath attempts to discover Docker's pre-created network namespace
|
||||
// path from OCI spec hooks. Docker's libnetwork-setkey hook contains the
|
||||
// sandbox ID as its second argument, which maps to the netns file under
|
||||
// /var/run/docker/netns/<sandbox_id>.
|
||||
func DockerNetnsPath(spec *specs.Spec) string {
|
||||
if spec == nil || spec.Hooks == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Search both Prestart and CreateRuntime hooks for libnetwork-setkey.
|
||||
hookSets := [][]specs.Hook{
|
||||
spec.Hooks.Prestart, //nolint:all
|
||||
spec.Hooks.CreateRuntime,
|
||||
}
|
||||
|
||||
for _, hooks := range hookSets {
|
||||
for _, hook := range hooks {
|
||||
for i, arg := range hook.Args {
|
||||
if arg == dockerLibnetworkSetkey && i+1 < len(hook.Args) {
|
||||
sandboxID := hook.Args[i+1]
|
||||
// Docker sandbox IDs are exactly 64 lowercase hex
|
||||
// characters. Reject anything else to prevent path
|
||||
// traversal and unexpected input.
|
||||
if !validSandboxID.MatchString(sandboxID) {
|
||||
continue
|
||||
}
|
||||
// Docker stores netns under well-known paths.
|
||||
// Use Lstat to reject symlinks (which could point
|
||||
// outside the Docker netns directory) and non-regular
|
||||
// files such as directories.
|
||||
for _, prefix := range dockerNetnsPrefixes {
|
||||
nsPath := prefix + sandboxID
|
||||
if fi, err := os.Lstat(nsPath); err == nil && fi.Mode().IsRegular() {
|
||||
return nsPath
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetGuestNUMANodes constructs guest NUMA nodes mapping to host NUMA nodes and host CPUs.
|
||||
func GetGuestNUMANodes(numaMapping []string) ([]types.GuestNUMANode, error) {
|
||||
// Add guest NUMA node for each specified subsets of host NUMA nodes.
|
||||
|
||||
@@ -579,24 +579,178 @@ func TestRevertBytes(t *testing.T) {
|
||||
assert.Equal(expectedNum, num)
|
||||
}
|
||||
|
||||
// TestIsDockerContainer validates hook-detection logic in isolation.
|
||||
// End-to-end Docker→containerd→kata integration is covered by
|
||||
// external tests (see tests/integration/kubernetes/).
|
||||
func TestIsDockerContainer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// nil spec
|
||||
assert.False(IsDockerContainer(nil))
|
||||
|
||||
// nil hooks
|
||||
assert.False(IsDockerContainer(&specs.Spec{}))
|
||||
|
||||
// Unrelated prestart hook
|
||||
ociSpec := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Args: []string{
|
||||
"haha",
|
||||
},
|
||||
},
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"haha"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.False(IsDockerContainer(ociSpec))
|
||||
|
||||
// Prestart hook with libnetwork (Docker < 26)
|
||||
ociSpec.Hooks.Prestart = append(ociSpec.Hooks.Prestart, specs.Hook{ //nolint:all
|
||||
Args: []string{"libnetwork-xxx"},
|
||||
})
|
||||
assert.True(IsDockerContainer(ociSpec))
|
||||
|
||||
// CreateRuntime hook with libnetwork (Docker >= 26)
|
||||
ociSpec2 := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/usr/bin/docker-proxy", "libnetwork-setkey", "abc123", "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.True(IsDockerContainer(ociSpec2))
|
||||
|
||||
// CreateRuntime hook without libnetwork
|
||||
ociSpec3 := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/some/other/hook"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.False(IsDockerContainer(ociSpec3))
|
||||
}
|
||||
|
||||
// TestDockerNetnsPath validates netns path discovery from OCI hook args.
|
||||
// This does not test the actual namespace opening or endpoint scanning;
|
||||
// see integration tests for full-path coverage.
|
||||
func TestDockerNetnsPath(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Valid 64-char hex sandbox IDs for test cases.
|
||||
validID := strings.Repeat("ab", 32) // 64 hex chars
|
||||
validID2 := strings.Repeat("cd", 32) // another 64 hex chars
|
||||
invalidShortID := "abc123" // too short
|
||||
invalidUpperID := strings.Repeat("AB", 32) // uppercase rejected
|
||||
|
||||
// nil spec
|
||||
assert.Equal("", DockerNetnsPath(nil))
|
||||
|
||||
// nil hooks
|
||||
assert.Equal("", DockerNetnsPath(&specs.Spec{}))
|
||||
|
||||
// Hook without libnetwork-setkey
|
||||
spec := &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/some/binary", "unrelated"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey but sandbox ID too short (rejected by regex)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidShortID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey but uppercase hex (rejected by regex)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", invalidUpperID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with valid sandbox ID but netns file doesn't exist on disk
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Prestart hook with libnetwork-setkey and existing netns file — success path
|
||||
tmpDir := t.TempDir()
|
||||
fakeNsDir := filepath.Join(tmpDir, "netns")
|
||||
err := os.MkdirAll(fakeNsDir, 0755)
|
||||
assert.NoError(err)
|
||||
fakeNsFile := filepath.Join(fakeNsDir, validID)
|
||||
err = os.WriteFile(fakeNsFile, []byte{}, 0644)
|
||||
assert.NoError(err)
|
||||
|
||||
// Temporarily override dockerNetnsPrefixes so DockerNetnsPath can find
|
||||
// the netns file we created under the temp directory.
|
||||
origPrefixes := dockerNetnsPrefixes
|
||||
dockerNetnsPrefixes = []string{fakeNsDir + "/"}
|
||||
defer func() { dockerNetnsPrefixes = origPrefixes }()
|
||||
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal(fakeNsFile, DockerNetnsPath(spec))
|
||||
|
||||
// Sandbox ID that is a directory rather than a regular file — must be rejected
|
||||
dirID := validID2
|
||||
err = os.MkdirAll(filepath.Join(fakeNsDir, dirID), 0755)
|
||||
assert.NoError(err)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", dirID, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// CreateRuntime hook with valid sandbox ID — file doesn't exist
|
||||
validID3 := strings.Repeat("ef", 32)
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
CreateRuntime: []specs.Hook{
|
||||
{Args: []string{"/usr/bin/proxy", "libnetwork-setkey", validID3, "ctrl"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Hook with libnetwork-setkey as last arg (no sandbox ID follows) — no panic
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{"libnetwork-setkey"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
|
||||
// Empty args slice
|
||||
spec = &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{ //nolint:all
|
||||
{Args: []string{}},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.Equal("", DockerNetnsPath(spec))
|
||||
}
|
||||
|
||||
45
tests/integration/docker/gha-run.sh
Executable file
45
tests/integration/docker/gha-run.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
kata_tarball_dir="${2:-kata-artifacts}"
|
||||
docker_dir="$(dirname "$(readlink -f "$0")")"
|
||||
source "${docker_dir}/../../common.bash"
|
||||
image="${image:-instrumentisto/nmap:latest}"
|
||||
|
||||
function install_dependencies() {
|
||||
info "Installing the dependencies needed for running the docker smoke test"
|
||||
|
||||
sudo -E docker pull "${image}"
|
||||
}
|
||||
|
||||
function run() {
|
||||
info "Running docker smoke test tests using ${KATA_HYPERVISOR} hypervisor"
|
||||
|
||||
enabling_hypervisor
|
||||
|
||||
info "Running docker with runc"
|
||||
sudo docker run --rm --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com
|
||||
|
||||
info "Running docker with Kata Containers (${KATA_HYPERVISOR})"
|
||||
sudo docker run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping "${image}" --tcp-connect -c 2 -p 80 www.github.com
|
||||
}
|
||||
|
||||
function main() {
|
||||
action="${1:-}"
|
||||
case "${action}" in
|
||||
install-dependencies) install_dependencies ;;
|
||||
install-kata) install_kata ;;
|
||||
run) run ;;
|
||||
*) >&2 die "Invalid argument" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user