From 2e075383340ff4acca80a63bf66ed549562b6c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 11 May 2022 09:21:04 +0200 Subject: [PATCH 1/7] clh: Expose VmAddNetPut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VmAddNetPut is the API provided by the Cloud Hypervisor client (auto generated) code to hotplug a new network device to the VM. Let's expose it now as it'll be used as part this series, mostly to guide the reviewer through the process of what we have to do, as later on, spoiler alert, it'll end up being removed. Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 6 ++++++ src/runtime/virtcontainers/clh_test.go | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index a47c688f4a..6a7cf61224 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -99,6 +99,8 @@ type clhClient interface { VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) // Remove a device from the VM VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) + // Add a new net device to the VM + VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) } type clhClientApi struct { @@ -142,6 +144,10 @@ func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chc return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute() } +func (c *clhClientApi) VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) { + return c.ApiInternal.VmAddNetPut(ctx).NetConfig(netConfig).Execute() +} + // // Cloud hypervisor state // diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 79e35210b1..23c54b3423 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -117,6 +117,11 @@ func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice ch return nil, nil } +//nolint:golint +func (c *clhClientMock) VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) { + return chclient.PciDeviceInfo{}, nil, nil +} + func TestCloudHypervisorAddVSock(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} From 01fe09a4ee2d6a71e107f5c482b7c0a77d350f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 11 May 2022 09:33:51 +0200 Subject: [PATCH 2/7] clh: Hotplug the network devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of creating the VM with the network device already plugged in, let's actually add the network device *after* the VM is created, but *before* the Vm is actually booted. Although it looks like it doesn't make any functional difference between what's done in the past and what this commit introduces, this will be used to workaround a limitation on OpenAPI when it comes to passing down the network device's file descriptor to Cloud Hypervisor, so Cloud Hypervisor can use it instead of opening the device by its name on the VMM side. Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 31 +++++++++++++++++++++++--- src/runtime/virtcontainers/clh_test.go | 29 +++++++++++++++++------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 6a7cf61224..1016bcb8e0 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -148,6 +148,21 @@ func (c *clhClientApi) VmAddNetPut(ctx context.Context, netConfig chclient.NetCo return c.ApiInternal.VmAddNetPut(ctx).NetConfig(netConfig).Execute() } +// This is done in order to be able to override such a function as part of +// our unit tests, as when testing bootVM we're on a mocked scenario already. +var vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { + cl := clh.client() + + for _, netDevice := range *clh.netDevices { + _, _, err := cl.VmAddNetPut(ctx, netDevice) + if err != nil { + return err + } + } + + return nil +} + // // Cloud hypervisor state // @@ -170,6 +185,7 @@ type cloudHypervisor struct { APIClient clhClient ctx context.Context id string + netDevices *[]chclient.NetConfig devicesIds map[string]string vmconfig chclient.VmConfig state CloudHypervisorState @@ -1267,6 +1283,10 @@ func openAPIClientError(err error) error { return fmt.Errorf("error: %v reason: %s", err, reason) } +func (clh *cloudHypervisor) vmAddNetPut(ctx context.Context) error { + return vmAddNetPutRequest(clh, ctx) +} + func (clh *cloudHypervisor) bootVM(ctx context.Context) error { cl := clh.client() @@ -1294,6 +1314,11 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error { return fmt.Errorf("VM state is not 'Created' after 'CreateVM'") } + err = clh.vmAddNetPut(ctx) + if err != nil { + return err + } + clh.Logger().Debug("Booting VM") _, err = cl.BootVM(ctx) if err != nil { @@ -1397,10 +1422,10 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { net.SetRateLimiterConfig(*netRateLimiterConfig) } - if clh.vmconfig.Net != nil { - *clh.vmconfig.Net = append(*clh.vmconfig.Net, *net) + if clh.netDevices != nil { + *clh.netDevices = append(*clh.netDevices, *net) } else { - clh.vmconfig.Net = &[]chclient.NetConfig{*net} + clh.netDevices = &[]chclient.NetConfig{*net} } return nil diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 23c54b3423..b1108ab6c9 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -148,19 +148,19 @@ func TestCloudHypervisorAddNetCheckNetConfigListValues(t *testing.T) { err := clh.addNet(e) assert.Nil(err) - assert.Equal(len(*clh.vmconfig.Net), 1) + assert.Equal(len(*clh.netDevices), 1) if err == nil { - assert.Equal(*(*clh.vmconfig.Net)[0].Mac, macTest) - assert.Equal(*(*clh.vmconfig.Net)[0].Tap, tapPath) + assert.Equal(*(*clh.netDevices)[0].Mac, macTest) + assert.Equal(*(*clh.netDevices)[0].Tap, tapPath) } err = clh.addNet(e) assert.Nil(err) - assert.Equal(len(*clh.vmconfig.Net), 2) + assert.Equal(len(*clh.netDevices), 2) if err == nil { - assert.Equal(*(*clh.vmconfig.Net)[1].Mac, macTest) - assert.Equal(*(*clh.vmconfig.Net)[1].Tap, tapPath) + assert.Equal(*(*clh.netDevices)[1].Mac, macTest) + assert.Equal(*(*clh.netDevices)[1].Tap, tapPath) } } @@ -194,7 +194,7 @@ func TestCloudHypervisorAddNetCheckEnpointTypes(t *testing.T) { t.Errorf("cloudHypervisor.addNet() error = %v, wantErr %v", err, tt.wantErr) } else if err == nil { - assert.Equal(*(*clh.vmconfig.Net)[0].Tap, tapPath) + assert.Equal(*(*clh.netDevices)[0].Tap, tapPath) } }) } @@ -350,7 +350,7 @@ func TestCloudHypervisorNetRateLimiter(t *testing.T) { if err := clh.addNet(validVeth); err != nil { t.Errorf("cloudHypervisor.addNet() error = %v", err) } else { - netConfig := (*clh.vmconfig.Net)[0] + netConfig := (*clh.netDevices)[0] assert.Equal(netConfig.HasRateLimiterConfig(), tt.expectsRateLimiter) if tt.expectsRateLimiter { @@ -378,6 +378,13 @@ func TestCloudHypervisorNetRateLimiter(t *testing.T) { func TestCloudHypervisorBootVM(t *testing.T) { clh := &cloudHypervisor{} clh.APIClient = &clhClientMock{} + + savedVmAddNetPutRequestFunc := vmAddNetPutRequest + vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { return nil } + defer func() { + vmAddNetPutRequest = savedVmAddNetPutRequestFunc + }() + var ctx context.Context if err := clh.bootVM(ctx); err != nil { t.Errorf("cloudHypervisor.bootVM() error = %v", err) @@ -491,6 +498,12 @@ func TestCloudHypervisorStartSandbox(t *testing.T) { store, err := persist.GetDriver() assert.NoError(err) + savedVmAddNetPutRequestFunc := vmAddNetPutRequest + vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { return nil } + defer func() { + vmAddNetPutRequest = savedVmAddNetPutRequestFunc + }() + clhConfig.VMStorePath = store.RunVMStoragePath() clhConfig.RunStorePath = store.RunStoragePath() From 55ed32e9247d33dc55a327993b77c548eb7d2ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 11 May 2022 10:37:58 +0200 Subject: [PATCH 3/7] clh: Take care of the VmAdNetdPut request ourselves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knowing that VmAddNetPut works as expected, let's switch to manually building the request and writing it to the appropriate socket. By doing this it gives us more flexibility to, later on, pass the file descriptor of the tuntap device to Cloud Hypervisor, as openAPI doesn't support such operation (it has no notion of SCM Rights). Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 73 +++++++++++++++++++++----- src/runtime/virtcontainers/clh_test.go | 9 +--- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 1016bcb8e0..5d290ff3bc 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -9,11 +9,15 @@ package virtcontainers import ( + "bufio" + "bytes" "context" "encoding/json" "fmt" + "io/ioutil" "net" "net/http" + "net/http/httputil" "os" "os/exec" "path/filepath" @@ -99,8 +103,6 @@ type clhClient interface { VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) // Remove a device from the VM VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) - // Add a new net device to the VM - VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) } type clhClientApi struct { @@ -144,20 +146,67 @@ func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chc return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute() } -func (c *clhClientApi) VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) { - return c.ApiInternal.VmAddNetPut(ctx).NetConfig(netConfig).Execute() -} - // This is done in order to be able to override such a function as part of // our unit tests, as when testing bootVM we're on a mocked scenario already. -var vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { - cl := clh.client() +var vmAddNetPutRequest = func(clh *cloudHypervisor) error { + addr, err := net.ResolveUnixAddr("unix", clh.state.apiSocket) + if err != nil { + return err + } + + conn, err := net.DialUnix("unix", nil, addr) + if err != nil { + return err + } + defer conn.Close() for _, netDevice := range *clh.netDevices { - _, _, err := cl.VmAddNetPut(ctx, netDevice) + netDeviceAsJson, err := json.Marshal(netDevice) if err != nil { return err } + netDeviceAsIoReader := bytes.NewBuffer(netDeviceAsJson) + + req, err := http.NewRequest(http.MethodPut, "http://localhost/api/v1/vm.add-net", netDeviceAsIoReader) + if err != nil { + return err + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Content-Length", strconv.Itoa(int(netDeviceAsIoReader.Len()))) + + payload, err := httputil.DumpRequest(req, true) + if err != nil { + return err + } + + payloadn, err := conn.Write([]byte(payload)) + if err != nil { + return err + } + if payloadn != len(payload) { + return fmt.Errorf("Failed to send all the request to Cloud Hypervisor. %d bytes expect to send, but only %d sent", len(payload), payloadn) + } + + reader := bufio.NewReader(conn) + resp, err := http.ReadResponse(reader, req) + if err != nil { + return err + } + + respBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + + resp.Body.Close() + resp.Body = ioutil.NopCloser(bytes.NewBuffer(respBody)) + + if resp.StatusCode != 204 { + clh.Logger().Errorf("vmAddNetPut failed with error '%d'. Response: %+v", resp.StatusCode, resp) + return fmt.Errorf("Failed to add the network device '%+v' to Cloud Hypervisor: %v", netDevice, resp.StatusCode) + } } return nil @@ -1283,8 +1332,8 @@ func openAPIClientError(err error) error { return fmt.Errorf("error: %v reason: %s", err, reason) } -func (clh *cloudHypervisor) vmAddNetPut(ctx context.Context) error { - return vmAddNetPutRequest(clh, ctx) +func (clh *cloudHypervisor) vmAddNetPut() error { + return vmAddNetPutRequest(clh) } func (clh *cloudHypervisor) bootVM(ctx context.Context) error { @@ -1314,7 +1363,7 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error { return fmt.Errorf("VM state is not 'Created' after 'CreateVM'") } - err = clh.vmAddNetPut(ctx) + err = clh.vmAddNetPut() if err != nil { return err } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index b1108ab6c9..31ae67c1a9 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -117,11 +117,6 @@ func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice ch return nil, nil } -//nolint:golint -func (c *clhClientMock) VmAddNetPut(ctx context.Context, netConfig chclient.NetConfig) (chclient.PciDeviceInfo, *http.Response, error) { - return chclient.PciDeviceInfo{}, nil, nil -} - func TestCloudHypervisorAddVSock(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} @@ -380,7 +375,7 @@ func TestCloudHypervisorBootVM(t *testing.T) { clh.APIClient = &clhClientMock{} savedVmAddNetPutRequestFunc := vmAddNetPutRequest - vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { return nil } + vmAddNetPutRequest = func(clh *cloudHypervisor) error { return nil } defer func() { vmAddNetPutRequest = savedVmAddNetPutRequestFunc }() @@ -499,7 +494,7 @@ func TestCloudHypervisorStartSandbox(t *testing.T) { assert.NoError(err) savedVmAddNetPutRequestFunc := vmAddNetPutRequest - vmAddNetPutRequest = func(clh *cloudHypervisor, ctx context.Context) error { return nil } + vmAddNetPutRequest = func(clh *cloudHypervisor) error { return nil } defer func() { vmAddNetPutRequest = savedVmAddNetPutRequestFunc }() From bf3ddc125d57622afa371b3242b921a37db2268d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 19 May 2022 09:32:27 +0200 Subject: [PATCH 4/7] clh: Pass the tuntap fds down to Cloud Hypervisor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is basically a no-op right now, as: * netPair.TapInterface.VMFds is nil * the tap name is still passed to Cloud Hypervisor, which is the Cloud Hypervisor's first choice when opening a tap device. In the very near future we'll stop passing the tap name to Cloud Hypervisor, and start passing the file descriptors of the opened tap instead. Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 35 ++++++++++++++++---------- src/runtime/virtcontainers/clh_test.go | 3 +++ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 5d290ff3bc..df493f4b3d 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -181,12 +181,18 @@ var vmAddNetPutRequest = func(clh *cloudHypervisor) error { return err } - payloadn, err := conn.Write([]byte(payload)) + files := clh.netDevicesFiles[*netDevice.Mac] + var fds []int + for _, f := range files { + fds = append(fds, int(f.Fd())) + } + oob := syscall.UnixRights(fds...) + payloadn, oobn, err := conn.WriteMsgUnix([]byte(payload), oob, nil) if err != nil { return err } - if payloadn != len(payload) { - return fmt.Errorf("Failed to send all the request to Cloud Hypervisor. %d bytes expect to send, but only %d sent", len(payload), payloadn) + if payloadn != len(payload) || oobn != len(oob) { + return fmt.Errorf("Failed to send all the request to Cloud Hypervisor. %d bytes expect to send as payload, %d bytes expect to send as oob date, but only %d sent as payload, and %d sent as oob", len(payload), len(oob), payloadn, oobn) } reader := bufio.NewReader(conn) @@ -229,16 +235,17 @@ func (s *CloudHypervisorState) reset() { } type cloudHypervisor struct { - console console.Console - virtiofsDaemon VirtiofsDaemon - APIClient clhClient - ctx context.Context - id string - netDevices *[]chclient.NetConfig - devicesIds map[string]string - vmconfig chclient.VmConfig - state CloudHypervisorState - config HypervisorConfig + console console.Console + virtiofsDaemon VirtiofsDaemon + APIClient clhClient + ctx context.Context + id string + netDevices *[]chclient.NetConfig + devicesIds map[string]string + netDevicesFiles map[string][]*os.File + vmconfig chclient.VmConfig + state CloudHypervisorState + config HypervisorConfig } var clhKernelParams = []Param{ @@ -430,6 +437,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.id = id clh.state.state = clhNotReady clh.devicesIds = make(map[string]string) + clh.netDevicesFiles = make(map[string][]*os.File) clh.Logger().WithField("function", "CreateVM").Info("creating Sandbox") @@ -1456,6 +1464,7 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { if tapPath == "" { return errors.New("TAP path in network pair is empty") } + clh.netDevicesFiles[mac] = netPair.TapInterface.VMFds clh.Logger().WithFields(log.Fields{ "mac": mac, diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 31ae67c1a9..1343cfac78 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -135,6 +135,7 @@ func TestCloudHypervisorAddNetCheckNetConfigListValues(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} + clh.netDevicesFiles = make(map[string][]*os.File) e := &VethEndpoint{} e.NetPair.TAPIface.HardAddr = macTest @@ -185,6 +186,7 @@ func TestCloudHypervisorAddNetCheckEnpointTypes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { clh := &cloudHypervisor{} + clh.netDevicesFiles = make(map[string][]*os.File) if err := clh.addNet(tt.args.e); (err != nil) != tt.wantErr { t.Errorf("cloudHypervisor.addNet() error = %v, wantErr %v", err, tt.wantErr) @@ -339,6 +341,7 @@ func TestCloudHypervisorNetRateLimiter(t *testing.T) { clhConfig.NetRateLimiterOpsOneTimeBurst = tt.args.opsOneTimeBurst clh := &cloudHypervisor{} + clh.netDevicesFiles = make(map[string][]*os.File) clh.config = clhConfig clh.APIClient = &clhClientMock{} From 93b61e0f0753d6fd89698d1aaf4948b326140eff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 21 May 2022 09:17:34 +0200 Subject: [PATCH 5/7] network: Add FFI_NO_PI to the netlink flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding FFI_NO_PI to the netlink flags causes no harm to the supported and tested hypervisors as when opening the device by its name Cloud Hypervisor[0], Firecracker[1], and QEMU[2] do set the flag already. However, when receiving the file descriptor of an opened tutap device Cloud Hypervisor is not able to set the flag, leaving the guest without connectivity. To avoid such an issue, let's simply add the FFI_NO_PI flag to the netlink flags and ensure, from our side, that the VMMs don't have to set it on their side when dealing with an already opened tuntap device. Note that there's a PR opened[3] just for testing that this change doesn't cause any breakage. [0]: https://github.com/cloud-hypervisor/cloud-hypervisor/blob/e52175c2ab6a7e000b2f8f4aafa73ce4e0582a6e/net_util/src/tap.rs#L129 [1]: https://github.com/firecracker-microvm/firecracker/blob/b6d6f712131e4e746f603c4562de7fea8a318a02/src/devices/src/virtio/net/tap.rs#L126 [2]: https://github.com/qemu/qemu/blob/3757b0d08b399c609954cf57f273b1167e5d7a8d/net/tap-linux.c#L54 [3]: https://github.com/kata-containers/kata-containers/pull/4292 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/network_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index c4f2380e57..f8456a4976 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -408,7 +408,7 @@ func createLink(netHandle *netlink.Handle, name string, expectedLink netlink.Lin switch expectedLink.Type() { case (&netlink.Tuntap{}).Type(): - flags := netlink.TUNTAP_VNET_HDR + flags := netlink.TUNTAP_VNET_HDR | netlink.TUNTAP_NO_PI if queues > 0 { flags |= netlink.TUNTAP_MULTI_QUEUE_DEFAULTS } From 0b75522e1fc8e7ef12bd58caf7b387f7f9ae40cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 19 May 2022 09:34:08 +0200 Subject: [PATCH 6/7] network: Set queues to 1 to ensure we get the network fds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to have the file descriptors of the opened tuntap device to pass them down to the VMMs, so the VMMs don't have to explicitly open a new tuntap device themselves, as the `container_kvm_t` label does not allow such a thing. With this change we ensure that what's currently done when using QEMU as the hypervisor, can be easily replicated with other VMMs, even if they don't support multiqueue. As a side effect of this, we need to close the received file descriptors in the code of the VMMs which are not going to use them. Fixes: #3533 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 9 ++--- src/runtime/virtcontainers/clh_test.go | 44 +++++++++++++++------ src/runtime/virtcontainers/fc.go | 6 +++ src/runtime/virtcontainers/network_linux.go | 10 +++++ 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index df493f4b3d..23a16f46f1 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1457,25 +1457,22 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { mac := e.HardwareAddr() netPair := e.NetworkPair() if netPair == nil { - return errors.New("net Pair to be added is nil, needed to get TAP path") + return errors.New("net Pair to be added is nil, needed to get TAP file descriptors") } - tapPath := netPair.TapInterface.TAPIface.Name - if tapPath == "" { - return errors.New("TAP path in network pair is empty") + if len(netPair.TapInterface.VMFds) == 0 { + return errors.New("The file descriptors for the network pair are not present") } clh.netDevicesFiles[mac] = netPair.TapInterface.VMFds clh.Logger().WithFields(log.Fields{ "mac": mac, - "tap": tapPath, }).Info("Adding Net") netRateLimiterConfig := clh.getNetRateLimiterConfig() net := chclient.NewNetConfig() net.Mac = &mac - net.Tap = &tapPath if netRateLimiterConfig != nil { net.SetRateLimiterConfig(*netRateLimiterConfig) } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 1343cfac78..302d945f00 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -10,6 +10,7 @@ package virtcontainers import ( "context" + "io/ioutil" "net/http" "os" "path/filepath" @@ -129,25 +130,30 @@ func TestCloudHypervisorAddVSock(t *testing.T) { // Check addNet appends to the network config list new configurations. // Check that the elements in the list has the correct values func TestCloudHypervisorAddNetCheckNetConfigListValues(t *testing.T) { - macTest := "00:00:00:00:00" - tapPath := "/path/to/tap" - assert := assert.New(t) + macTest := "00:00:00:00:00" + + file, err := ioutil.TempFile("", "netFd") + assert.Nil(err) + defer os.Remove(file.Name()) + + vmFds := make([]*os.File, 1) + vmFds = append(vmFds, file) + clh := cloudHypervisor{} clh.netDevicesFiles = make(map[string][]*os.File) e := &VethEndpoint{} e.NetPair.TAPIface.HardAddr = macTest - e.NetPair.TapInterface.TAPIface.Name = tapPath + e.NetPair.TapInterface.VMFds = vmFds - err := clh.addNet(e) + err = clh.addNet(e) assert.Nil(err) assert.Equal(len(*clh.netDevices), 1) if err == nil { assert.Equal(*(*clh.netDevices)[0].Mac, macTest) - assert.Equal(*(*clh.netDevices)[0].Tap, tapPath) } err = clh.addNet(e) @@ -156,7 +162,6 @@ func TestCloudHypervisorAddNetCheckNetConfigListValues(t *testing.T) { assert.Equal(len(*clh.netDevices), 2) if err == nil { assert.Equal(*(*clh.netDevices)[1].Mac, macTest) - assert.Equal(*(*clh.netDevices)[1].Tap, tapPath) } } @@ -165,10 +170,18 @@ func TestCloudHypervisorAddNetCheckNetConfigListValues(t *testing.T) { func TestCloudHypervisorAddNetCheckEnpointTypes(t *testing.T) { assert := assert.New(t) - tapPath := "/path/to/tap" + macTest := "00:00:00:00:00" + + file, err := ioutil.TempFile("", "netFd") + assert.Nil(err) + defer os.Remove(file.Name()) + + vmFds := make([]*os.File, 1) + vmFds = append(vmFds, file) validVeth := &VethEndpoint{} - validVeth.NetPair.TapInterface.TAPIface.Name = tapPath + validVeth.NetPair.TAPIface.HardAddr = macTest + validVeth.NetPair.TapInterface.VMFds = vmFds type args struct { e Endpoint @@ -189,9 +202,9 @@ func TestCloudHypervisorAddNetCheckEnpointTypes(t *testing.T) { clh.netDevicesFiles = make(map[string][]*os.File) if err := clh.addNet(tt.args.e); (err != nil) != tt.wantErr { t.Errorf("cloudHypervisor.addNet() error = %v, wantErr %v", err, tt.wantErr) - } else if err == nil { - assert.Equal(*(*clh.netDevices)[0].Tap, tapPath) + files := clh.netDevicesFiles[macTest] + assert.Equal(files, vmFds) } }) } @@ -201,10 +214,15 @@ func TestCloudHypervisorAddNetCheckEnpointTypes(t *testing.T) { func TestCloudHypervisorNetRateLimiter(t *testing.T) { assert := assert.New(t) - tapPath := "/path/to/tap" + file, err := ioutil.TempFile("", "netFd") + assert.Nil(err) + defer os.Remove(file.Name()) + + vmFds := make([]*os.File, 1) + vmFds = append(vmFds, file) validVeth := &VethEndpoint{} - validVeth.NetPair.TapInterface.TAPIface.Name = tapPath + validVeth.NetPair.TapInterface.VMFds = vmFds type args struct { bwMaxRate int64 diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 3a89c7fa44..b792c90acb 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -927,6 +927,12 @@ func (fc *firecracker) fcAddNetDevice(ctx context.Context, endpoint Endpoint) { ifaceID := endpoint.Name() + // VMFds are not used by Firecracker, as it opens the tuntap + // device by its name. Let's just close those. + for _, f := range endpoint.NetworkPair().TapInterface.VMFds { + f.Close() + } + // The implementation of rate limiter is based on TBF. // Rate Limiter defines a token bucket with a maximum capacity (size) to store tokens, and an interval for refilling purposes (refill_time). // The refill-rate is derived from size and refill_time, and it is the constant rate at which the tokens replenish. diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index f8456a4976..f3356bb1cf 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -411,6 +411,16 @@ func createLink(netHandle *netlink.Handle, name string, expectedLink netlink.Lin flags := netlink.TUNTAP_VNET_HDR | netlink.TUNTAP_NO_PI if queues > 0 { flags |= netlink.TUNTAP_MULTI_QUEUE_DEFAULTS + } else { + // We need to enforce `queues = 1` here in case + // multi-queue is *not* supported, the reason being + // `linkModify()`, a method called by `LinkAdd()`, only + // returning the file descriptor of the opened tuntap + // device when the queues are set to *non zero*. + // + // Please, for more information, refer to: + // https://github.com/kata-containers/kata-containers/blob/e6e5d2593ac319329269d7b58c30f99ba7b2bf5a/src/runtime/vendor/github.com/vishvananda/netlink/link_linux.go#L1164-L1316 + queues = 1 } newLink = &netlink.Tuntap{ LinkAttrs: netlink.LinkAttrs{Name: name}, From ac5dbd8598abd7ccae6f7c3b0085d03ddee44355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 26 May 2022 12:23:01 +0200 Subject: [PATCH 7/7] clh: Improve logging related to the net dev addition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's improve the log so we make it clear that we're only *actually* adding the net device to the Cloud Hypervisor configuration when calling our own version of VmAddNetPut(). Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 23a16f46f1..ce8f8fa577 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -161,6 +161,8 @@ var vmAddNetPutRequest = func(clh *cloudHypervisor) error { defer conn.Close() for _, netDevice := range *clh.netDevices { + clh.Logger().Infof("Adding the net device to the Cloud Hypervisor VM configuration: %+v", netDevice) + netDeviceAsJson, err := json.Marshal(netDevice) if err != nil { return err @@ -1465,10 +1467,6 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { } clh.netDevicesFiles[mac] = netPair.TapInterface.VMFds - clh.Logger().WithFields(log.Fields{ - "mac": mac, - }).Info("Adding Net") - netRateLimiterConfig := clh.getNetRateLimiterConfig() net := chclient.NewNetConfig() @@ -1483,6 +1481,8 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { clh.netDevices = &[]chclient.NetConfig{*net} } + clh.Logger().Infof("Storing the Cloud Hypervisor network configuration: %+v", net) + return nil }