From 32fd013716fa7b4818b2edd88d7b3a62e0e22be1 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 21 Aug 2023 09:26:42 +0000 Subject: [PATCH 1/3] runtime: run prestart hooks before starting VM for FC Add a new hypervisor capability to tell if it supports device hotplug. If not, we should run prestart hooks before starting new VMs as nerdctl is using the prestart hooks to set up netns. To make nerdctl + FC to work, we need to run the prestart hooks before starting new VMs. Fixes: #6384 Signed-off-by: Peng Tao --- src/runtime/virtcontainers/acrn_arch_base.go | 1 + .../virtcontainers/acrn_arch_base_test.go | 1 + src/runtime/virtcontainers/acrn_test.go | 1 + src/runtime/virtcontainers/clh.go | 1 + src/runtime/virtcontainers/clh_test.go | 3 ++ src/runtime/virtcontainers/qemu_amd64.go | 1 + src/runtime/virtcontainers/qemu_amd64_test.go | 2 + src/runtime/virtcontainers/qemu_arch_base.go | 1 + .../virtcontainers/qemu_arch_base_test.go | 1 + src/runtime/virtcontainers/qemu_ppc64le.go | 1 + src/runtime/virtcontainers/qemu_test.go | 1 + src/runtime/virtcontainers/sandbox.go | 50 ++++++++++++++----- .../virtcontainers/types/capabilities.go | 11 ++++ 13 files changed, 63 insertions(+), 12 deletions(-) diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 0b9ee53cc7..fa8ce0fe62 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -366,6 +366,7 @@ func (a *acrnArchBase) capabilities(config HypervisorConfig) types.Capabilities caps.SetBlockDeviceSupport() caps.SetBlockDeviceHotplugSupport() + caps.SetNetworkDeviceHotplugSupported() return caps } diff --git a/src/runtime/virtcontainers/acrn_arch_base_test.go b/src/runtime/virtcontainers/acrn_arch_base_test.go index c34974e698..39b3ba98f9 100644 --- a/src/runtime/virtcontainers/acrn_arch_base_test.go +++ b/src/runtime/virtcontainers/acrn_arch_base_test.go @@ -89,6 +89,7 @@ func TestAcrnArchBaseCapabilities(t *testing.T) { assert.True(c.IsBlockDeviceSupported()) assert.True(c.IsBlockDeviceHotplugSupported()) assert.False(c.IsFsSharingSupported()) + assert.True(c.IsNetworkDeviceHotplugSupported()) } func TestAcrnArchBaseMemoryTopology(t *testing.T) { diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 9ce189812c..8c7e646c13 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -82,6 +82,7 @@ func TestAcrnCapabilities(t *testing.T) { caps := a.Capabilities(a.ctx) assert.True(caps.IsBlockDeviceSupported()) assert.True(caps.IsBlockDeviceHotplugSupported()) + assert.True(caps.IsNetworkDeviceHotplugSupported()) } func testAcrnAddDevice(t *testing.T, devInfo interface{}, devType DeviceType, expected []Device) { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 10a1fc7ab9..bac9e56ad8 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1210,6 +1210,7 @@ func (clh *cloudHypervisor) Capabilities(ctx context.Context) types.Capabilities caps.SetFsSharingSupport() } caps.SetBlockDeviceHotplugSupport() + caps.SetNetworkDeviceHotplugSupported() return caps } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index d617ab4e1c..e456f1dcf7 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -752,4 +752,7 @@ func TestClhCapabilities(t *testing.T) { c = clh.Capabilities(ctx) assert.False(c.IsFsSharingSupported()) + + assert.True(c.IsNetworkDeviceHotplugSupported()) + assert.True(c.IsBlockDeviceHotplugSupported()) } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 64a0e0915c..96aa689463 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -162,6 +162,7 @@ func (q *qemuAmd64) capabilities(hConfig HypervisorConfig) types.Capabilities { if q.qemuMachine.Type == QemuQ35 || q.qemuMachine.Type == QemuVirt { caps.SetBlockDeviceHotplugSupport() + caps.SetNetworkDeviceHotplugSupported() } caps.SetMultiQueueSupport() diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 82005b7459..1425cb38cf 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -47,10 +47,12 @@ func TestQemuAmd64Capabilities(t *testing.T) { amd64 := newTestQemu(assert, QemuQ35) caps := amd64.capabilities(config) assert.True(caps.IsBlockDeviceHotplugSupported()) + assert.True(caps.IsNetworkDeviceHotplugSupported()) amd64 = newTestQemu(assert, QemuMicrovm) caps = amd64.capabilities(config) assert.False(caps.IsBlockDeviceHotplugSupported()) + assert.False(caps.IsNetworkDeviceHotplugSupported()) } func TestQemuAmd64Bridges(t *testing.T) { diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 0dc9f46cde..93317b16f4 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -300,6 +300,7 @@ func (q *qemuArchBase) capabilities(hConfig HypervisorConfig) types.Capabilities var caps types.Capabilities caps.SetBlockDeviceHotplugSupport() caps.SetMultiQueueSupport() + caps.SetNetworkDeviceHotplugSupported() if hConfig.SharedFS != config.NoSharedFS { caps.SetFsSharingSupport() } diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 75d7de0295..3090e765b4 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -123,6 +123,7 @@ func TestQemuArchBaseCapabilities(t *testing.T) { c := qemuArchBase.capabilities(hConfig) assert.True(c.IsBlockDeviceHotplugSupported()) assert.True(c.IsFsSharingSupported()) + assert.True(c.IsNetworkDeviceHotplugSupported()) hConfig.SharedFS = config.NoSharedFS c = qemuArchBase.capabilities(hConfig) diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 7d71d72ba5..015d1758c6 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -104,6 +104,7 @@ func (q *qemuPPC64le) capabilities(hConfig HypervisorConfig) types.Capabilities // pseries machine type supports hotplugging drives if q.qemuMachine.Type == QemuPseries { caps.SetBlockDeviceHotplugSupport() + caps.SetNetworkDeviceHotplugSupported() } caps.SetMultiQueueSupport() diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 418075e26b..b47d234909 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -475,6 +475,7 @@ func TestQemuCapabilities(t *testing.T) { caps := q.Capabilities(q.ctx) assert.True(caps.IsBlockDeviceHotplugSupported()) + assert.True(caps.IsNetworkDeviceHotplugSupported()) } func TestQemuQemuPath(t *testing.T) { diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index aba798a905..1d3f0a4fbb 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1310,6 +1310,23 @@ func (s *Sandbox) cleanSwap(ctx context.Context) { } } +func (s *Sandbox) runPrestartHooks(ctx context.Context, prestartHookFunc func(context.Context) error) error { + hid, err := s.GetHypervisorPid() + if err != nil { + s.Logger().Errorf("fail to get hypervisor pid for sandbox %s", s.id) + return err + } + s.Logger().Infof("sandbox %s hypervisor pid is %v", s.id, hid) + ctx = context.WithValue(ctx, HypervisorPidKey{}, hid) + + if err := prestartHookFunc(ctx); err != nil { + s.Logger().Errorf("fail to run prestartHook for sandbox %s: %s", s.id, err) + return err + } + + return nil +} + // startVM starts the VM. func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Context) error) (err error) { span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) @@ -1332,6 +1349,17 @@ func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Con } }() + caps := s.hypervisor.Capabilities(ctx) + // If the hypervisor does not support device hotplug, run prestart hooks + // before spawning the VM so that it is possible to let the hooks set up + // netns and thus network devices are set up statically. + if !caps.IsNetworkDeviceHotplugSupported() && prestartHookFunc != nil { + err = s.runPrestartHooks(ctx, prestartHookFunc) + if err != nil { + return err + } + } + if err := s.network.Run(ctx, func() error { if s.factory != nil { vm, err := s.factory.GetVM(ctx, VMConfig{ @@ -1351,24 +1379,22 @@ func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Con return err } - if prestartHookFunc != nil { - hid, err := s.GetHypervisorPid() + if caps.IsNetworkDeviceHotplugSupported() && prestartHookFunc != nil { + err = s.runPrestartHooks(ctx, prestartHookFunc) if err != nil { return err } - s.Logger().Infof("hypervisor pid is %v", hid) - ctx = context.WithValue(ctx, HypervisorPidKey{}, hid) - - if err := prestartHookFunc(ctx); err != nil { - return err - } } - // 1. Do not scan the netns if we want no network for the vmm. - // 2. In case of vm factory, scan the netns to hotplug interfaces after vm is started. - // 3. In case of prestartHookFunc, network config might have been changed. We need to + // 1. Do not scan the netns if we want no network for the vmm + // 2. Do not scan the netns if the vmm does not support device hotplug, in which case + // the network is already set up statically + // 3. In case of vm factory, scan the netns to hotplug interfaces after vm is started. + // 4. In case of prestartHookFunc, network config might have been changed. We need to // rescan and handle the change. - if !s.config.NetworkConfig.DisableNewNetwork && (s.factory != nil || prestartHookFunc != nil) { + if !s.config.NetworkConfig.DisableNewNetwork && + caps.IsNetworkDeviceHotplugSupported() && + (s.factory != nil || prestartHookFunc != nil) { if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil { return err } diff --git a/src/runtime/virtcontainers/types/capabilities.go b/src/runtime/virtcontainers/types/capabilities.go index c035444b68..71c7f2d8f5 100644 --- a/src/runtime/virtcontainers/types/capabilities.go +++ b/src/runtime/virtcontainers/types/capabilities.go @@ -10,6 +10,7 @@ const ( blockDeviceHotplugSupport multiQueueSupport fsSharingSupported + networkDeviceHotplugSupport ) // Capabilities describe a virtcontainers hypervisor capabilities @@ -57,3 +58,13 @@ func (caps *Capabilities) IsFsSharingSupported() bool { func (caps *Capabilities) SetFsSharingSupport() { caps.flags |= fsSharingSupported } + +// IsDeviceHotplugSupported tells if an hypervisor supports device hotplug. +func (caps *Capabilities) IsNetworkDeviceHotplugSupported() bool { + return caps.flags&networkDeviceHotplugSupport != 0 +} + +// SetDeviceHotplugSupported sets the host filesystem sharing capability to true. +func (caps *Capabilities) SetNetworkDeviceHotplugSupported() { + caps.flags |= networkDeviceHotplugSupport +} From 21204caf20a51e514e60586c705d8b7b836b118c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 22 Aug 2023 11:17:31 +0000 Subject: [PATCH 2/3] runtime: fail early when starting docker container with FC FC does not support network device hotplug. Let's add a check to fail early when starting containers created by docker. Signed-off-by: Peng Tao --- src/runtime/virtcontainers/sandbox.go | 11 +++++++++ src/runtime/virtcontainers/utils/utils.go | 20 ++++++++++++++++ .../virtcontainers/utils/utils_test.go | 23 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 1d3f0a4fbb..62d03532ff 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -955,6 +955,17 @@ func (s *Sandbox) createNetwork(ctx context.Context) error { return nil } + // docker container needs the hypervisor process ID to find out the container netns, + // which means that the hypervisor has to support network device hotplug so that docker + // can use the prestart hooks to set up container netns. + caps := s.hypervisor.Capabilities(ctx) + if !caps.IsNetworkDeviceHotplugSupported() { + spec := s.GetPatchedOCISpec() + if utils.IsDockerContainer(spec) { + return errors.New("docker container needs network device hotplug but the configured hypervisor does not support it") + } + } + span, ctx := katatrace.Trace(ctx, s.Logger(), "createNetwork", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() katatrace.AddTags(span, "network", s.network, "NetworkConfig", s.config.NetworkConfig) diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index bac62e8cfd..735cf3a2f5 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -12,9 +12,11 @@ import ( "os" "os/exec" "path/filepath" + "strings" "syscall" "time" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -494,3 +496,21 @@ func RevertBytes(num uint64) uint64 { } return 1024*RevertBytes(a) + b } + +// IsDockerContainer returns if the container is managed by docker +// This is done by checking the prestart hook for `libnetwork` arguments. +func IsDockerContainer(spec *specs.Spec) bool { + if spec == nil || spec.Hooks == nil { + return false + } + + for _, hook := range spec.Hooks.Prestart { + for _, arg := range hook.Args { + if strings.HasPrefix(arg, "libnetwork") { + return true + } + } + } + + return false +} diff --git a/src/runtime/virtcontainers/utils/utils_test.go b/src/runtime/virtcontainers/utils/utils_test.go index 8f3d0eed26..bae9af8dbd 100644 --- a/src/runtime/virtcontainers/utils/utils_test.go +++ b/src/runtime/virtcontainers/utils/utils_test.go @@ -16,6 +16,7 @@ import ( "syscall" "testing" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" ) @@ -580,3 +581,25 @@ func TestRevertBytes(t *testing.T) { num := RevertBytes(testNum) assert.Equal(expectedNum, num) } + +func TestIsDockerContainer(t *testing.T) { + assert := assert.New(t) + + ociSpec := &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Args: []string{ + "haha", + }, + }, + }, + }, + } + assert.False(IsDockerContainer(ociSpec)) + + ociSpec.Hooks.Prestart = append(ociSpec.Hooks.Prestart, specs.Hook{ + Args: []string{"libnetwork-xxx"}, + }) + assert.True(IsDockerContainer(ociSpec)) +} From 2e4c874726a9e10b53fcae52bca1a45bc642f689 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 30 Aug 2023 03:01:22 +0000 Subject: [PATCH 3/3] runtime/vc: runPrestartHooks should ignore GetHypervisorPid failure If we are running FC hypervisor, it is not started when prestart hooks are executed. So we should just ignore such error and just go ahead and run the hooks. Signed-off-by: Peng Tao --- src/runtime/virtcontainers/sandbox.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 62d03532ff..9d46f66cdf 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1322,13 +1322,12 @@ func (s *Sandbox) cleanSwap(ctx context.Context) { } func (s *Sandbox) runPrestartHooks(ctx context.Context, prestartHookFunc func(context.Context) error) error { - hid, err := s.GetHypervisorPid() - if err != nil { - s.Logger().Errorf("fail to get hypervisor pid for sandbox %s", s.id) - return err + hid, _ := s.GetHypervisorPid() + // Ignore errors here as hypervisor might not have been started yet, likely in FC case. + if hid > 0 { + s.Logger().Infof("sandbox %s hypervisor pid is %v", s.id, hid) + ctx = context.WithValue(ctx, HypervisorPidKey{}, hid) } - s.Logger().Infof("sandbox %s hypervisor pid is %v", s.id, hid) - ctx = context.WithValue(ctx, HypervisorPidKey{}, hid) if err := prestartHookFunc(ctx); err != nil { s.Logger().Errorf("fail to run prestartHook for sandbox %s: %s", s.id, err)