From a33a22ccd1ddfb7e067f384fe33b042f98181e78 Mon Sep 17 00:00:00 2001 From: Zhongtao Hu Date: Tue, 10 Jan 2023 18:15:45 +0800 Subject: [PATCH 01/20] runtime-rs: add missing config section for share-fs add missing config sections for share-fs Fixes:#6020 Signed-off-by: Zhongtao Hu --- .../kata-types/src/config/hypervisor/mod.rs | 4 +++ src/runtime-rs/Makefile | 2 ++ .../config/configuration-dragonball.toml.in | 28 +++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index aba5c751b6..d3dff69142 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -779,6 +779,10 @@ pub struct SharedFsInfo { #[serde(default)] pub virtio_fs_cache_size: u32, + /// Default size of virtqueues + #[serde(default)] + pub virtio_fs_queue_size: u32, + /// Enable virtio-fs DAX window if true. #[serde(default)] pub virtio_fs_is_dax: bool, diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index ea97517389..ffe7a934bc 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -119,6 +119,7 @@ DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"] # if value is 0, DAX is not enabled DEFVIRTIOFSCACHESIZE ?= 0 DEFVIRTIOFSCACHE ?= auto +DEFVIRTIOFSQUEUESIZE ?= 1024 # Format example: # [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"] # @@ -256,6 +257,7 @@ USER_VARS += DEFVIRTIOFSDAEMON USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS USER_VARS += DEFVIRTIOFSCACHESIZE USER_VARS += DEFVIRTIOFSCACHE +USER_VARS += DEFVIRTIOFSQUEUESIZE USER_VARS += DEFVIRTIOFSEXTRAARGS USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEIOTHREADS diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index ca1db2681a..fa39d23c82 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -136,6 +136,34 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@" # of shim, does not need an external virtiofsd process. shared_fs = "@DBSHAREDFS@" +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + # Enable huge pages for VM RAM, default false # Enabling this will result in the VM memory # being allocated using huge pages. From 6199b69178e892c1e09b688d02ef3e49581c9ff8 Mon Sep 17 00:00:00 2001 From: Zhongtao Hu Date: Tue, 10 Jan 2023 20:23:49 +0800 Subject: [PATCH 02/20] runtime-rs: change cache mode use never as the cache mode if none is configured Fixes:#6020 Signed-off-by: Zhongtao Hu --- src/libs/kata-types/src/config/hypervisor/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index d3dff69142..afd3a42ea5 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -32,7 +32,7 @@ use regex::RegexSet; use super::{default, ConfigOps, ConfigPlugin, TomlConfig}; use crate::annotations::KATA_ANNO_CFG_HYPERVISOR_PREFIX; -use crate::{eother, resolve_path, validate_path}; +use crate::{eother, resolve_path, sl, validate_path}; mod dragonball; pub use self::dragonball::{DragonballConfig, HYPERVISOR_NAME_DRAGONBALL}; @@ -850,6 +850,10 @@ impl SharedFsInfo { if self.virtio_fs_cache.is_empty() { self.virtio_fs_cache = default::DEFAULT_VIRTIO_FS_CACHE_MODE.to_string(); } + if self.virtio_fs_cache == *"none" { + warn!(sl!(), "virtio-fs cache mode `none` is deprecated since Kata Containers 2.5.0 and will be removed in the future release, please use `never` instead. For more details please refer to https://github.com/kata-containers/kata-containers/issues/4234."); + self.virtio_fs_cache = default::DEFAULT_VIRTIO_FS_CACHE_MODE.to_string(); + } if self.virtio_fs_is_dax && self.virtio_fs_cache_size == 0 { self.virtio_fs_cache_size = default::DEFAULT_VIRTIO_FS_DAX_SIZE_MB; } From 9f490d16fef91c472d139ccf35ed98b7def820e1 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 16 Jan 2023 16:40:38 +0800 Subject: [PATCH 03/20] upcall: add document for upcall In order for users to get better understand of upcall features, we add this document for upcall to illustrate what is upcall and how to enable upcall. fixes: #6054 Signed-off-by: Chao Wu --- src/dragonball/README.md | 1 + .../docs/images/upcall-architecture.svg | 177 ++++++++++++++++++ src/dragonball/docs/upcall.md | 30 +++ 3 files changed, 208 insertions(+) create mode 100644 src/dragonball/docs/images/upcall-architecture.svg create mode 100644 src/dragonball/docs/upcall.md diff --git a/src/dragonball/README.md b/src/dragonball/README.md index c9d7e5119c..3fde0782e0 100644 --- a/src/dragonball/README.md +++ b/src/dragonball/README.md @@ -19,6 +19,7 @@ and configuration process. Device: [Device Document](docs/device.md) vCPU: [vCPU Document](docs/vcpu.md) API: [API Document](docs/api.md) +`Upcall`: [`Upcall` Document](docs/upcall.md) Currently, the documents are still actively adding. You could see the [official documentation](docs/) page for more details. diff --git a/src/dragonball/docs/images/upcall-architecture.svg b/src/dragonball/docs/images/upcall-architecture.svg new file mode 100644 index 0000000000..a74c37f455 --- /dev/null +++ b/src/dragonball/docs/images/upcall-architecture.svg @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + + Canvas 1 + + + Layer 1 + + + + + + Guest User + + + + + Guest Kernel + + + + + + + + Hypervisor + + + + + + + + socket + + + + + + Device + Manager + Service + + + + + bind + + + + + listen + + + + + accept + + + + + new kthread + + + + + + virtio-vsocket + + + + + + virtio-vsocket backend + + + + + + + + + + + + + + + + + + Port + + + + + + Device + Manager + Backend + + + + + + + + + Service B + + + + + + Backend B + + + + + + + + …… + + + + + …… + + + + + Upcall Server + + + + + + + + Service handler + + + + + + Conn + + + + + + Conn + + + + + + + + + + + diff --git a/src/dragonball/docs/upcall.md b/src/dragonball/docs/upcall.md new file mode 100644 index 0000000000..292b33d270 --- /dev/null +++ b/src/dragonball/docs/upcall.md @@ -0,0 +1,30 @@ +# `Upcall` + +## What is `Upcall`? + +`Upcall` is a direct communication tool between VMM and guest developed upon `vsock`. The server side of the `upcall` is a driver in guest kernel (kernel patches are needed for this feature) and it'll start to serve the requests after the kernel starts. And the client side is in Dragonball VMM , it'll be a thread that communicates with `vsock` through `uds`. + +We want to keep the lightweight of the VM through the implementation of the `upcall`. + +![architecture overview](images/upcall-architecture.svg) +## What can `upcall` do? + +We define specific operations in the device manager service (one of the services in `upcall` we developed) to perform device hotplug / hot-unplug including vCPU hotplug, `virtio-mmio` hotplug, and memory hotplug. We have accomplished device hotplug / hot-unplug directly through `upcall` in order to avoid the virtualization of ACPI to minimize virtual machines overhead. And there could be many other uses if other services are implemented. + +## How to enable `upcall`? + +`Upcall` needs a server in the guest kernel which will be several kernel patches for the `upcall` server itself and different services registered in the `upcall` server. It's currently tested on upstream Linux kernel 5.10. + +To make it easy for users to use, we have open-source the `upcall` guest patches in [Dragonball experimental guest patches](../../../tools/packaging/kernel/patches/5.10.x/dragonball-experimental) and develop `upcall` support in [Kata guest kernel building script](../../../tools/packaging/kernel/build-kernel.sh). + +You could use following command to download the upstream kernel (currently Dragonball uses 5.10.25) and put the `upcall` patches and other Kata patches into kernel code. + +`sh build-kernel.sh -e -t dragonball -f setup` + +`-e` here means experimental, mainly because `upcall` patches are not in upstream Linux kernel. +`-t dragonball` is for specifying hypervisor type +`-f` is for generating `.config` file + +After this command, the kernel code with `upcall` and related `.config` file are all set up in the directory `kata-linux-dragonball-experimental-5.10.25-[config version]`. You can either manually compile the kernel with `make` command or following [Document for build-kernel.sh](../../../tools/packaging/kernel/README.md) to build and use this guest kernel. + +Also, a client-side is also needed in VMM. Dragonball has already open-source the way to implement `upcall` client and Dragonball compiled with `dbs-upcall` feature will enable Dragonball client side. \ No newline at end of file From a85d0e465ca2b9050952bce93a0919022510671a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20Rop=C3=A9?= Date: Thu, 19 Jan 2023 11:27:51 +0100 Subject: [PATCH 04/20] versions: update cni plugins version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use cni plugins v1.2.0 to get latest fixes. Fixes: #6110 Signed-off-by: Julien Ropé --- versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index f6bd320a22..f79eff7709 100644 --- a/versions.yaml +++ b/versions.yaml @@ -193,7 +193,7 @@ externals: cni-plugins: description: "CNI network plugins" url: "https://github.com/containernetworking/plugins" - version: "v1.1.1" + version: "v1.2.0" conmon: description: "An OCI container runtime monitor" From 219bb8e7d02fdcf54853e4ce2f926d9add1f16ec Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 24 Nov 2022 18:43:40 +0100 Subject: [PATCH 05/20] govmm: Optionally start QMP with a pre-configured connection When QEMU is launched daemonized, we have the guarantee that the QMP socket is available. In order to launch a non-daemonized QEMU, the QMP connection should be created before QEMU is started in order to avoid a race. Introduce a variant of QMPStart() that can use such an existing connection. Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/qmp.go | 10 ++++++++++ src/runtime/pkg/govmm/qemu/qmp_test.go | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 24c92b9b20..2103b7f79b 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -702,6 +702,16 @@ func QMPStart(ctx context.Context, socket string, cfg QMPConfig, disconnectedCh return nil, nil, err } + return QMPStartWithConn(ctx, conn, cfg, disconnectedCh) +} + +// Same as QMPStart but with a pre-established connection +func QMPStartWithConn(ctx context.Context, conn net.Conn, cfg QMPConfig, disconnectedCh chan struct{}) (*QMP, *QMPVersion, error) { + if conn == nil { + close(disconnectedCh) + return nil, nil, fmt.Errorf("invalid connection") + } + connectedCh := make(chan *QMPVersion) q := startQMPLoop(conn, cfg, connectedCh, disconnectedCh) diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index 3f82fa54f9..c6bee11e7d 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -273,6 +273,22 @@ func TestQMPStartBadPath(t *testing.T) { <-disconnectedCh } +// Checks that a call to QMPStartWithConn with a nil connection exits gracefully. +// +// We call QMPStartWithConn with a nil connection. +// +// An error should be returned and the disconnected channel should be closed. +func TestQMPStartWithConnNil(t *testing.T) { + cfg := QMPConfig{Logger: qmpTestLogger{}} + disconnectedCh := make(chan struct{}) + q, _, err := QMPStartWithConn(context.Background(), nil, cfg, disconnectedCh) + if err == nil { + t.Errorf("Expected error") + q.Shutdown() + } + <-disconnectedCh +} + // Checks that the qmp_capabilities command is correctly sent. // // We start a QMPLoop, send the qmp_capabilities command and stop the From 8a4f08cb0f7c5471cb4fe274eca0d79ba8539c27 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 23 Nov 2022 14:22:28 +0100 Subject: [PATCH 06/20] govmm: Optionally pass QMP listener to QEMU QEMU's -qmp option can be passed the file descriptor of a socket that is already in listening mode. This is done with by passing `fd=XXX` to `-qmp` instead of a path. Note that these two options are mutually exclusive : QEMU errors out if both are passed, so we check that as well in the validation function. While here add the `path=` stanza in the path based case for clarity. Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/qemu.go | 14 +++++++++++-- src/runtime/pkg/govmm/qemu/qemu_test.go | 26 ++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index f0beeb9c85..d665c0c7a4 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -2340,6 +2340,9 @@ type QMPSocket struct { // Type is the socket type (e.g. "unix"). Type QMPSocketType + // QMP listener file descriptor to be passed to qemu + FD *os.File + // Name is the socket name. Name string @@ -2352,7 +2355,8 @@ type QMPSocket struct { // Valid returns true if the QMPSocket structure is valid and complete. func (qmp QMPSocket) Valid() bool { - if qmp.Type == "" || qmp.Name == "" { + // Exactly one of Name of FD must be set. + if qmp.Type == "" || (qmp.Name == "") == (qmp.FD == nil) { return false } @@ -2692,7 +2696,13 @@ func (config *Config) appendQMPSockets() { continue } - qmpParams := append([]string{}, fmt.Sprintf("%s:%s", q.Type, q.Name)) + var qmpParams []string + if q.FD != nil { + qemuFDs := config.appendFDs([]*os.File{q.FD}) + qmpParams = append([]string{}, fmt.Sprintf("%s:fd=%d", q.Type, qemuFDs[0])) + } else { + qmpParams = append([]string{}, fmt.Sprintf("%s:path=%s", q.Type, q.Name)) + } if q.Server { qmpParams = append(qmpParams, "server=on") if q.NoWait { diff --git a/src/runtime/pkg/govmm/qemu/qemu_test.go b/src/runtime/pkg/govmm/qemu/qemu_test.go index 01ca765c50..b7428d1019 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_test.go @@ -698,8 +698,8 @@ func TestFailToAppendCPUs(t *testing.T) { } } -var qmpSingleSocketServerString = "-qmp unix:cc-qmp,server=on,wait=off" -var qmpSingleSocketString = "-qmp unix:cc-qmp" +var qmpSingleSocketServerString = "-qmp unix:path=cc-qmp,server=on,wait=off" +var qmpSingleSocketString = "-qmp unix:path=cc-qmp" func TestAppendSingleQMPSocketServer(t *testing.T) { qmp := QMPSocket{ @@ -722,7 +722,27 @@ func TestAppendSingleQMPSocket(t *testing.T) { testAppend(qmp, qmpSingleSocketString, t) } -var qmpSocketServerString = "-qmp unix:cc-qmp-1,server=on,wait=off -qmp unix:cc-qmp-2,server=on,wait=off" +var qmpSocketServerFdString = "-qmp unix:fd=3,server=on,wait=off" + +func TestAppendQMPSocketServerFd(t *testing.T) { + foo, _ := os.CreateTemp(os.TempDir(), "govmm-qemu-test") + + defer func() { + _ = foo.Close() + _ = os.Remove(foo.Name()) + }() + + qmp := QMPSocket{ + Type: "unix", + FD: foo, + Server: true, + NoWait: true, + } + + testAppend(qmp, qmpSocketServerFdString, t) +} + +var qmpSocketServerString = "-qmp unix:path=cc-qmp-1,server=on,wait=off -qmp unix:path=cc-qmp-2,server=on,wait=off" func TestAppendQMPSocketServer(t *testing.T) { qmp := []QMPSocket{ From 8a1723a5cb97aa69230864bd0435f2f21c198170 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 23 Nov 2022 16:02:01 +0100 Subject: [PATCH 07/20] runtime: Pre-establish the QMP connection Running QEMU daemonized ensures that the QMP socket is ready to accept connections when LaunchQemu() returns. In order to be able to run QEMU undaemonized, let's handle that part upfront. Create a listener socket and connect to it. Pass the listener to QEMU and pass the connected socket to QMP : this ensures that we cannot fail to establish QMP connection and that we can detect if QEMU exits before accepting the connection. This is basically what libvirt does. Signed-off-by: Greg Kurz --- src/runtime/virtcontainers/qemu.go | 67 ++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 75a6731dd1..f2d4711371 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -14,6 +14,7 @@ import ( "encoding/json" "fmt" "math" + "net" "os" "os/user" "path/filepath" @@ -368,7 +369,6 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) { return []govmmQemu.QMPSocket{ { Type: "unix", - Name: q.qmpMonitorCh.path, Server: true, NoWait: true, }, @@ -812,6 +812,59 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error { return err } +// setupEarlyQmpConnection creates a listener socket to be passed to QEMU +// as a QMP listening endpoint. An initial connection is established, to +// be used as the QMP client socket. This allows to detect an early failure +// of QEMU instead of looping on connect until some timeout expires. +func (q *qemu) setupEarlyQmpConnection() (net.Conn, error) { + monitorSockPath := q.qmpMonitorCh.path + + qmpListener, err := net.Listen("unix", monitorSockPath) + if err != nil { + q.Logger().WithError(err).Errorf("Unable to listen on unix socket address (%s)", monitorSockPath) + return nil, err + } + + // A duplicate fd of this socket will be passed to QEMU. We must + // close the original one when we're done. + defer qmpListener.Close() + + if rootless.IsRootless() { + err = syscall.Chown(monitorSockPath, int(q.config.Uid), int(q.config.Gid)) + if err != nil { + q.Logger().WithError(err).Errorf("Unable to make unix socket (%s) rootless", monitorSockPath) + return nil, err + } + } + + VMFd, err := qmpListener.(*net.UnixListener).File() + if err != nil { + return nil, err + } + defer func() { + if err != nil { + VMFd.Close() + } + }() + + // This socket will be used to establish the initial QMP connection + dialer := net.Dialer{Cancel: q.qmpMonitorCh.ctx.Done()} + conn, err := dialer.Dial("unix", monitorSockPath) + if err != nil { + q.Logger().WithError(err).Errorf("Unable to connect to unix socket (%s)", monitorSockPath) + return nil, err + } + + // We need to keep the socket file around to be able to re-connect + qmpListener.(*net.UnixListener).SetUnlinkOnClose(false) + + // Pass the duplicated fd of the listener socket to QEMU + q.qemuConfig.QMPSockets[0].FD = VMFd + q.fds = append(q.fds, q.qemuConfig.QMPSockets[0].FD) + + return conn, nil +} + // StartVM will start the Sandbox's VM. func (q *qemu) StartVM(ctx context.Context, timeout int) error { span, ctx := katatrace.Trace(ctx, q.Logger(), "StartVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) @@ -857,6 +910,12 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { } }() + var qmpConn net.Conn + qmpConn, err = q.setupEarlyQmpConnection() + if err != nil { + return err + } + // This needs to be done as late as possible, just before launching // virtiofsd are executed by kata-runtime after this call, run with // the SELinux label. If these processes require privileged, we do @@ -895,7 +954,7 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { return fmt.Errorf("failed to launch qemu: %s, error messages from qemu log: %s", err, strErr) } - err = q.waitVM(ctx, timeout) + err = q.waitVM(ctx, qmpConn, timeout) if err != nil { return err } @@ -933,7 +992,7 @@ func (q *qemu) bootFromTemplate() error { } // waitVM will wait for the Sandbox's VM to be up and running. -func (q *qemu) waitVM(ctx context.Context, timeout int) error { +func (q *qemu) waitVM(ctx context.Context, qmpConn net.Conn, timeout int) error { span, _ := katatrace.Trace(ctx, q.Logger(), "waitVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) defer span.End() @@ -953,7 +1012,7 @@ func (q *qemu) waitVM(ctx context.Context, timeout int) error { timeStart := time.Now() for { disconnectCh = make(chan struct{}) - qmp, ver, err = govmmQemu.QMPStart(q.qmpMonitorCh.ctx, q.qmpMonitorCh.path, cfg, disconnectCh) + qmp, ver, err = govmmQemu.QMPStartWithConn(q.qmpMonitorCh.ctx, qmpConn, cfg, disconnectCh) if err == nil { break } From bf4e3a618f3b2aa33ecabc2ca0f6f2d4a527ffe1 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 7 Dec 2022 17:53:20 +0100 Subject: [PATCH 08/20] runtime: Launch QEMU with cmd.Start() LaunchCustomQemu() currently starts QEMU with cmd.Run() which is supposed to block until the child process terminates. This assumes that QEMU daemonizes itself, otherwise LaunchCustomQemu() would block forever. The virtcontainers package indeed enables the Daemonize knob in the configuration but having such an implicit dependency on a supposedly configurable setting is ugly and fragile. cmd.Run() is : func (c *Cmd) Run() error { if err := c.Start(); err != nil { return err } return c.Wait() } Let's open-code this : govmm calls cmd.Start() and returns the cmd to virtcontainers which calls cmd.Wait(). If QEMU doesn't start, e.g. missing binary, there won't be any errors to collect from QEMU output. Just drop these lines in govmm. Similarily there won't be any log file to read from in virtcontainers. Drop that as well. Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/examples_test.go | 11 +++++--- src/runtime/pkg/govmm/qemu/qemu.go | 30 +++++++-------------- src/runtime/virtcontainers/qemu.go | 19 +++++++------ 3 files changed, 26 insertions(+), 34 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/examples_test.go b/src/runtime/pkg/govmm/qemu/examples_test.go index 03e52b87ac..1a3f2e8ffb 100644 --- a/src/runtime/pkg/govmm/qemu/examples_test.go +++ b/src/runtime/pkg/govmm/qemu/examples_test.go @@ -27,13 +27,16 @@ func Example() { // resources params = append(params, "-m", "370", "-smp", "cpus=2") - // LaunchCustomQemu should return as soon as the instance has launched as we - // are using the --daemonize flag. It will set up a unix domain socket - // called /tmp/qmp-socket that we can use to manage the instance. - _, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) + // LaunchCustomQemu should return immediately. We must then wait + // the returned process to terminate as we are using the --daemonize + // flag. + // It will set up a unix domain socket called /tmp/qmp-socket that we + // can use to manage the instance. + proc, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) if err != nil { panic(err) } + proc.Wait() // This channel will be closed when the instance dies. disconnectedCh := make(chan struct{}) diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index d665c0c7a4..cecd2d2bb7 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -14,7 +14,6 @@ package qemu import ( - "bytes" "context" "fmt" "log" @@ -2985,12 +2984,8 @@ func (config *Config) appendFwCfg(logger QMPLog) { // // The Config parameter contains a set of qemu parameters and settings. // -// This function writes its log output via logger parameter. -// -// The function will block until the launched qemu process exits. "", nil -// will be returned if the launch succeeds. Otherwise a string containing -// the contents of stderr + a Go error object will be returned. -func LaunchQemu(config Config, logger QMPLog) (string, error) { +// See LaunchCustomQemu for more information. +func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, error) { config.appendName() config.appendUUID() config.appendMachine() @@ -3013,7 +3008,7 @@ func LaunchQemu(config Config, logger QMPLog) (string, error) { config.appendSeccompSandbox() if err := config.appendCPUs(); err != nil { - return "", err + return nil, err } ctx := config.Ctx @@ -3044,17 +3039,15 @@ func LaunchQemu(config Config, logger QMPLog) (string, error) { // // This function writes its log output via logger parameter. // -// The function will block until the launched qemu process exits. "", nil -// will be returned if the launch succeeds. Otherwise a string containing -// the contents of stderr + a Go error object will be returned. +// The function returns cmd, nil where cmd is a Go exec.Cmd object +// representing the QEMU process if launched successfully. Otherwise +// nil, err where err is a Go error object is returned. func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []*os.File, - attr *syscall.SysProcAttr, logger QMPLog) (string, error) { + attr *syscall.SysProcAttr, logger QMPLog) (*exec.Cmd, error) { if logger == nil { logger = qmpNullLogger{} } - errStr := "" - if path == "" { path = "qemu-system-x86_64" } @@ -3068,15 +3061,12 @@ func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []* cmd.SysProcAttr = attr - var stderr bytes.Buffer - cmd.Stderr = &stderr logger.Infof("launching %s with: %v", path, params) - err := cmd.Run() + err := cmd.Start() if err != nil { logger.Errorf("Unable to launch %s: %v", path, err) - errStr = stderr.String() - logger.Errorf("%s", errStr) + return nil, err } - return errStr, err + return cmd, nil } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index f2d4711371..8781747987 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -941,17 +941,16 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { } - var strErr string - strErr, err = govmmQemu.LaunchQemu(q.qemuConfig, newQMPLogger()) + qemuCmd, err := govmmQemu.LaunchQemu(q.qemuConfig, newQMPLogger()) if err != nil { - if q.config.Debug && q.qemuConfig.LogFile != "" { - b, err := os.ReadFile(q.qemuConfig.LogFile) - if err == nil { - strErr += string(b) - } - } - q.Logger().WithError(err).Errorf("failed to launch qemu: %s", strErr) - return fmt.Errorf("failed to launch qemu: %s, error messages from qemu log: %s", err, strErr) + q.Logger().WithError(err).Error("failed to launch qemu") + return fmt.Errorf("failed to launch qemu: %s", err) + } + if q.qemuConfig.Knobs.Daemonize { + // LaunchQemu returns a handle on the upper QEMU process. + // Wait for it to exit to assume that the QEMU daemon was + // actually started. + qemuCmd.Wait() } err = q.waitVM(ctx, qmpConn, timeout) From a5319c6be6caba40b6ca8a958121d0bf566d0605 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 15 Nov 2022 17:13:25 +0100 Subject: [PATCH 09/20] runtime: Start QEMU undaemonized QEMU has always been started daemonized since the beginning. I could not find any justification for that though, but it certainly introduces a problem : QEMU stops logging errors when started this way, which isn't accaptable from a support standpoint. The QEMU community discourages the use of -daemonize ; mostly because libvirt, QEMU's primary consummer, doesn't use this option and prefers getting errors from QEMU's stderr through a pipe in order to enforce rollover. Now that virtcontainers knows how to start QEMU with a pre- established QMP connection, let's start QEMU without -daemonize. This requires to handle the reaping of QEMU when it terminates. Since cmd.Wait() is blocking, call it from a goroutine. Signed-off-by: Greg Kurz --- src/runtime/virtcontainers/qemu.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 8781747987..8bbba51da1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -531,7 +531,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi NoDefaults: true, NoGraphic: true, NoReboot: true, - Daemonize: true, + Daemonize: false, MemPrealloc: q.config.MemPrealloc, HugePages: q.config.HugePages, IOMMUPlatform: q.config.IOMMUPlatform, @@ -951,6 +951,11 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // Wait for it to exit to assume that the QEMU daemon was // actually started. qemuCmd.Wait() + } else { + // Ensure the QEMU process is reaped after termination + go func() { + qemuCmd.Wait() + }() } err = q.waitVM(ctx, qmpConn, timeout) From 39fe4a4b6f13fde2f8c73d1be38d3d01cfd0e3fc Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 24 Jan 2023 16:12:44 +0100 Subject: [PATCH 10/20] runtime: Collect QEMU's stderr LaunchQemu now connects a pipe to QEMU's stderr and makes it usable by callers through a Go io.ReadCloser object. As explained in [0], all messages should be read from the pipe before calling cmd.Wait : introduce a LogAndWait helper to handle that. Fixes #5780 Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/examples_test.go | 2 +- src/runtime/pkg/govmm/qemu/qemu.go | 25 ++++++++++------- src/runtime/virtcontainers/qemu.go | 30 +++++++++++++++++---- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/examples_test.go b/src/runtime/pkg/govmm/qemu/examples_test.go index 1a3f2e8ffb..6f6727effb 100644 --- a/src/runtime/pkg/govmm/qemu/examples_test.go +++ b/src/runtime/pkg/govmm/qemu/examples_test.go @@ -32,7 +32,7 @@ func Example() { // flag. // It will set up a unix domain socket called /tmp/qmp-socket that we // can use to manage the instance. - proc, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) + proc, _, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) if err != nil { panic(err) } diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index cecd2d2bb7..4a071019b9 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -16,6 +16,7 @@ package qemu import ( "context" "fmt" + "io" "log" "os" "os/exec" @@ -2985,7 +2986,7 @@ func (config *Config) appendFwCfg(logger QMPLog) { // The Config parameter contains a set of qemu parameters and settings. // // See LaunchCustomQemu for more information. -func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, error) { +func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, io.ReadCloser, error) { config.appendName() config.appendUUID() config.appendMachine() @@ -3008,7 +3009,7 @@ func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, error) { config.appendSeccompSandbox() if err := config.appendCPUs(); err != nil { - return nil, err + return nil, nil, err } ctx := config.Ctx @@ -3039,11 +3040,12 @@ func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, error) { // // This function writes its log output via logger parameter. // -// The function returns cmd, nil where cmd is a Go exec.Cmd object -// representing the QEMU process if launched successfully. Otherwise -// nil, err where err is a Go error object is returned. +// The function returns cmd, reader, nil where cmd is a Go exec.Cmd object +// representing the QEMU process and reader a Go io.ReadCloser object +// connected to QEMU's stderr, if launched successfully. Otherwise +// nil, nil, err where err is a Go error object is returned. func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []*os.File, - attr *syscall.SysProcAttr, logger QMPLog) (*exec.Cmd, error) { + attr *syscall.SysProcAttr, logger QMPLog) (*exec.Cmd, io.ReadCloser, error) { if logger == nil { logger = qmpNullLogger{} } @@ -3061,12 +3063,17 @@ func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []* cmd.SysProcAttr = attr + reader, err := cmd.StderrPipe() + if err != nil { + logger.Errorf("Unable to connect stderr to a pipe") + return nil, nil, err + } logger.Infof("launching %s with: %v", path, params) - err := cmd.Start() + err = cmd.Start() if err != nil { logger.Errorf("Unable to launch %s: %v", path, err) - return nil, err + return nil, nil, err } - return cmd, nil + return cmd, reader, nil } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 8bbba51da1..54846b40e1 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -13,11 +13,14 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "math" "net" "os" + "os/exec" "os/user" "path/filepath" + "regexp" "strconv" "strings" "sync" @@ -865,6 +868,24 @@ func (q *qemu) setupEarlyQmpConnection() (net.Conn, error) { return conn, nil } +func (q *qemu) LogAndWait(qemuCmd *exec.Cmd, reader io.ReadCloser) { + pid := qemuCmd.Process.Pid + q.Logger().Infof("Start logging QEMU (qemuPid=%d)", pid) + scanner := bufio.NewScanner(reader) + warnRE := regexp.MustCompile("(^[^:]+: )warning: ") + for scanner.Scan() { + text := scanner.Text() + if warnRE.MatchString(text) { + text = warnRE.ReplaceAllString(text, "$1") + q.Logger().WithField("qemuPid", pid).Warning(text) + } else { + q.Logger().WithField("qemuPid", pid).Error(text) + } + } + q.Logger().Infof("Stop logging QEMU (qemuPid=%d)", pid) + qemuCmd.Wait() +} + // StartVM will start the Sandbox's VM. func (q *qemu) StartVM(ctx context.Context, timeout int) error { span, ctx := katatrace.Trace(ctx, q.Logger(), "StartVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) @@ -941,7 +962,7 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { } - qemuCmd, err := govmmQemu.LaunchQemu(q.qemuConfig, newQMPLogger()) + qemuCmd, reader, err := govmmQemu.LaunchQemu(q.qemuConfig, newQMPLogger()) if err != nil { q.Logger().WithError(err).Error("failed to launch qemu") return fmt.Errorf("failed to launch qemu: %s", err) @@ -952,10 +973,9 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // actually started. qemuCmd.Wait() } else { - // Ensure the QEMU process is reaped after termination - go func() { - qemuCmd.Wait() - }() + // Log QEMU errors and ensure the QEMU process is reaped after + // termination. + go q.LogAndWait(qemuCmd, reader) } err = q.waitVM(ctx, qmpConn, timeout) From 2b779cba0050ed40a0dfa3b540e9b3103020feb3 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 25 Jan 2023 15:27:29 +0000 Subject: [PATCH 11/20] docs: Update url link in QAT documentation This PR updates the url link in QAT documentation. Fixes #6130 Signed-off-by: Gabriela Cervantes --- docs/use-cases/using-Intel-QAT-and-kata.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use-cases/using-Intel-QAT-and-kata.md b/docs/use-cases/using-Intel-QAT-and-kata.md index d029de3672..e83569fbed 100644 --- a/docs/use-cases/using-Intel-QAT-and-kata.md +++ b/docs/use-cases/using-Intel-QAT-and-kata.md @@ -49,7 +49,7 @@ the latest driver. $ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz $ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER} $ export QAT_CONF_LOCATION=~/QAT_conf -$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile +$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile $ export QAT_SRC=~/src/QAT $ export GOPATH=~/src/go $ export KATA_KERNEL_LOCATION=~/kata From 00dcd900f9b9164029cd7022099acde032bb5257 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Thu, 26 Jan 2023 10:50:05 -0800 Subject: [PATCH 12/20] docs: Add documentation for building agent with seccomp support. The default for the agent today is building with seccomp support. However, additional steps need to be taken for building against musl such as installing the static seccomp library for musl. Add documentation to explain this. Fixes #6136 Signed-off-by: Archana Shinde --- docs/Developer-Guide.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index 1af800d5b9..5d0fdf6c52 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -232,10 +232,6 @@ $ rustup target add "${ARCH}-unknown-linux-${LIBC}" To build the agent: -```bash -$ make -C kata-containers/src/agent -``` - The agent is built with seccomp capability by default. If you want to build the agent without the seccomp capability, you need to run `make` with `SECCOMP=no` as follows. @@ -243,6 +239,31 @@ If you want to build the agent without the seccomp capability, you need to run ` $ make -C kata-containers/src/agent SECCOMP=no ``` +For building the agent with seccomp support using `musl`, set the environment +variables for the [`libseccomp` crate](https://github.com/libseccomp-rs/libseccomp-rs). + +```bash +$ export LIBSECCOMP_LINK_TYPE=static +$ export LIBSECCOMP_LIB_PATH="the path of the directory containing libseccomp.a" +$ make -C kata-containers/src/agent +``` + +If the compilation fails when the agent tries to link the `libseccomp` library statically +against `musl`, you will need to build `libseccomp` manually with `-U_FORTIFY_SOURCE`. +You can use [our script](https://github.com/kata-containers/kata-containers/blob/main/ci/install_libseccomp.sh) +to install `libseccomp` for the agent. + +```bash +$ mkdir -p ${seccomp_install_path} ${gperf_install_path} +$ kata-containers/ci/install_libseccomp.sh ${seccomp_install_path} ${gperf_install_path} +$ export LIBSECCOMP_LIB_PATH="${seccomp_install_path}/lib" +``` + +On `ppc64le` and `s390x`, `glibc` is used. You will need to install the `libseccomp` library +provided by your distribution. + +> e.g. `libseccomp-dev` for Ubuntu, or `libseccomp-devel` for CentOS + > **Note:** > > - If you enable seccomp in the main configuration file but build the agent without seccomp capability, From 0b3c91d2a23f31aabec923d2c5cd24d4e1d1e08c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 2 Jan 2023 09:52:41 +0100 Subject: [PATCH 13/20] kata-deploy: Add kernel-dragonball-experimental target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Chao Wu added the support for building the dragonball kernel as a new experimental kernel, let's make sure we reflect that as part of the kata-deploy build scripts. Signed-off-by: Fabiano Fidêncio --- tools/packaging/kata-deploy/local-build/Makefile | 4 ++++ .../kata-deploy/local-build/kata-deploy-binaries.sh | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 078a679d85..de831e1fe7 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -24,6 +24,7 @@ all-parallel: $(MK_DIR)/dockerbuild/install_yq.sh all: serial-targets \ firecracker-tarball \ kernel-tarball \ + kernel-dragonball-experimental-tarball \ nydus-tarball \ qemu-tarball \ shim-v2-tarball \ @@ -47,6 +48,9 @@ firecracker-tarball: kernel-tarball: ${MAKE} $@-build +kernel-dragonball-experimental-tarball: + ${MAKE} $@-build + kernel-experimental-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 1e83e31537..a7143096c4 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -73,6 +73,7 @@ options: cloud-hypervisor firecracker kernel + kernel-dragonball-experimental kernel-experimental nydus qemu @@ -103,6 +104,13 @@ install_kernel() { DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -f -v "${kernel_version}" } +#Install dragonball experimental kernel asset +install_dragonball_experimental_kernel() { + info "build dragonball experimental kernel" + export kernel_version="$(yq r $versions_yaml assets.dragonball-kernel-experimental.version)" + info "kernel version ${kernel_version}" + DESTDIR="${destdir}" PREFIX="${prefix}" "${kernel_builder}" -e -t dragonball -v ${kernel_version} +} #Install experimental kernel asset install_experimental_kernel() { @@ -204,6 +212,8 @@ handle_build() { nydus) install_nydus ;; + kernel-dragonball-experimental) install_dragonball_experimental_kernel;; + kernel-experimental) install_experimental_kernel;; qemu) install_qemu ;; From 063dec37c298ad16ee22c772a23020bef424f879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 2 Jan 2023 09:54:45 +0100 Subject: [PATCH 14/20] release: Add the dragonball-experimental kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add the dragonball specific kernel, which takes advantage of upcall, as part of the release tarball, so it can be used from the release tarball / kata-deploy. Signed-off-by: Fabiano Fidêncio --- .github/workflows/release.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b5f585937e..bbb95c53da 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,6 +13,7 @@ jobs: - cloud-hypervisor - firecracker - kernel + - kernel-dragonball-experimental - nydus - qemu - rootfs-image From b7f4e96ff39941dd97130e1b6f0166b830e8f655 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 2 Jan 2023 09:56:19 +0100 Subject: [PATCH 15/20] kata-deploy-test: Ensure we build dragonball specific kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the dragonball specific kernel is now part of the release, let's make sure we build it as part of the kata-deploy-test action. Fixes: #5859 Signed-off-by: Fabiano Fidêncio --- .github/workflows/kata-deploy-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/kata-deploy-test.yaml b/.github/workflows/kata-deploy-test.yaml index 7f6839b40f..5a924c7390 100644 --- a/.github/workflows/kata-deploy-test.yaml +++ b/.github/workflows/kata-deploy-test.yaml @@ -50,6 +50,7 @@ jobs: - cloud-hypervisor - firecracker - kernel + - kernel-dragonball-experimental - nydus - qemu - rootfs-image From 9092c23a2efaa635903c2161e02b61f05242fc76 Mon Sep 17 00:00:00 2001 From: zhaojizhuang <571130360@qq.com> Date: Sat, 28 Jan 2023 10:51:52 +0800 Subject: [PATCH 16/20] runtime: Add hmp for qemu Fixes: #6092 Signed-off-by: zhaojizhuang <571130360@qq.com> --- src/runtime/config/configuration-qemu.toml.in | 2 +- src/runtime/pkg/govmm/qemu/qemu.go | 11 ++++++- src/runtime/virtcontainers/hypervisor.go | 2 +- src/runtime/virtcontainers/qemu.go | 30 ++++++++++++++++--- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 0394d01246..4aced59752 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -311,7 +311,7 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ pflashes = [] # This option changes the default hypervisor and kernel parameters -# to enable debug output where available. +# to enable debug output where available. And Debug also enable the hmp socket. # # Default false #enable_debug = true diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 4a071019b9..148212f511 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -2336,10 +2336,14 @@ const ( ) // QMPSocket represents a qemu QMP socket configuration. +// nolint: govet type QMPSocket struct { // Type is the socket type (e.g. "unix"). Type QMPSocketType + // Human Monitor Interface (HMP) (true for HMP, false for QMP, default false) + IsHmp bool + // QMP listener file descriptor to be passed to qemu FD *os.File @@ -2710,7 +2714,12 @@ func (config *Config) appendQMPSockets() { } } - config.qemuParams = append(config.qemuParams, "-qmp") + if q.IsHmp { + config.qemuParams = append(config.qemuParams, "-monitor") + } else { + config.qemuParams = append(config.qemuParams, "-qmp") + } + config.qemuParams = append(config.qemuParams, strings.Join(qmpParams, ",")) } } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index bb1b8e90e7..3ea7a83a47 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -505,7 +505,7 @@ type HypervisorConfig struct { EnableIOThreads bool // Debug changes the default hypervisor and kernel parameters to - // enable debug output where available. + // enable debug output where available. And Debug also enable the hmp socket. Debug bool // MemPrealloc specifies if the memory should be pre-allocated diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 54846b40e1..d59d67d801 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -121,6 +121,7 @@ type qemu struct { const ( consoleSocket = "console.sock" qmpSocket = "qmp.sock" + hmpSocket = "hmp.sock" vhostFSSocket = "vhost-fs.sock" nydusdAPISock = "nydusd-api.sock" @@ -328,6 +329,10 @@ func (q *qemu) qmpSocketPath(id string) (string, error) { return utils.BuildSocketPath(q.config.VMStorePath, id, qmpSocket) } +func (q *qemu) hmpSocketPath(id string) (string, error) { + return utils.BuildSocketPath(q.config.VMStorePath, id, hmpSocket) +} + func (q *qemu) getQemuMachine() (govmmQemu.Machine, error) { machine := q.arch.machine() @@ -369,13 +374,30 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) { path: monitorSockPath, } - return []govmmQemu.QMPSocket{ - { + var sockets []govmmQemu.QMPSocket + + sockets = append(sockets, govmmQemu.QMPSocket{ + Type: "unix", + Server: true, + NoWait: true, + }) + + if q.HypervisorConfig().Debug { + humanMonitorSockPath, err := q.hmpSocketPath(q.id) + if err != nil { + return nil, err + } + + sockets = append(sockets, govmmQemu.QMPSocket{ Type: "unix", + IsHmp: true, + Name: humanMonitorSockPath, Server: true, NoWait: true, - }, - }, nil + }) + } + + return sockets, nil } func (q *qemu) buildDevices(ctx context.Context, initrdPath string) ([]govmmQemu.Device, *govmmQemu.IOThread, error) { From 8e8c720d51858b063e2b9c3e2aaa050f2f264e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 30 Jan 2023 09:39:24 +0100 Subject: [PATCH 17/20] kata-deploy-push: Ensure we build Dragonball specific kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the dragonball specific kernel is now part of the release, let's make sure we build it as part of the kata-deploy-push action. Fixes: #5859 Signed-off-by: Fabiano Fidêncio --- .github/workflows/kata-deploy-push.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/kata-deploy-push.yaml b/.github/workflows/kata-deploy-push.yaml index bdd0aeb2bd..c7d7e8cb4e 100644 --- a/.github/workflows/kata-deploy-push.yaml +++ b/.github/workflows/kata-deploy-push.yaml @@ -18,6 +18,7 @@ jobs: matrix: asset: - kernel + - kernel-dragonball-experimental - shim-v2 - qemu - cloud-hypervisor From dc90c6e30b7fbf729abda9c90a17671a2741d083 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Wed, 18 May 2022 00:22:22 +0800 Subject: [PATCH 18/20] dragonball: add more unit test for vm Added more unit tests for vm module. Fixes: #4899 Signed-off-by: wllenyj --- src/dragonball/src/address_space_manager.rs | 5 + src/dragonball/src/vm/mod.rs | 205 ++++++++++++++++++++ 2 files changed, 210 insertions(+) diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs index 0e9ac91d73..6efdb027ad 100644 --- a/src/dragonball/src/address_space_manager.rs +++ b/src/dragonball/src/address_space_manager.rs @@ -250,6 +250,11 @@ impl AddressSpaceMgr { self.address_space.as_ref() } + /// Get the guest memory. + pub fn vm_memory(&self) -> Option<::T> { + self.get_vm_as().map(|m| m.memory()) + } + /// Create the address space for a virtual machine. /// /// This method is designed to be called when starting up a virtual machine instead of at diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index d573080ae8..b9e0dc85cc 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -825,7 +825,14 @@ impl Vm { #[cfg(test)] pub mod tests { + use kvm_ioctls::VcpuExit; + use linux_loader::cmdline::Cmdline; + use test_utils::skip_if_not_root; + use vm_memory::GuestMemory; + use vmm_sys_util::tempfile::TempFile; + use super::*; + use crate::test_utils::tests::create_vm_for_test; impl Vm { pub fn set_instance_state(&mut self, mstate: InstanceState) { @@ -841,4 +848,202 @@ pub mod tests { let epoll_manager = EpollManager::default(); Vm::new(None, instance_info, epoll_manager).unwrap() } + + #[test] + fn test_create_vm_instance() { + skip_if_not_root!(); + let vm = create_vm_instance(); + assert!(vm.check_health().is_err()); + assert!(vm.kernel_config.is_none()); + assert!(vm.get_reset_eventfd().is_none()); + assert!(!vm.is_vm_initialized()); + assert!(!vm.is_vm_running()); + assert!(vm.reset_console().is_ok()); + } + + #[test] + fn test_vm_init_guest_memory() { + skip_if_not_root!(); + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + let mut vm = create_vm_instance(); + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + let vm_memory = vm.address_space.vm_memory().unwrap(); + + assert_eq!(vm_memory.num_regions(), 1); + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + + // Reconfigure an already configured vm will be ignored and just return OK. + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + let vm_memory = vm.address_space.vm_memory().unwrap(); + assert_eq!(vm_memory.num_regions(), 1); + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + + let obj_addr = GuestAddress(0xf0); + vm_memory.write_obj(67u8, obj_addr).unwrap(); + let read_val: u8 = vm_memory.read_obj(obj_addr).unwrap(); + assert_eq!(read_val, 67u8); + } + + #[test] + fn test_vm_create_devices() { + skip_if_not_root!(); + let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance())); + let epoll_mgr = EpollManager::default(); + + let mut guard = vmm.lock().unwrap(); + let vm = guard.get_vm_mut().unwrap(); + + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + assert!(vm.setup_interrupt_controller().is_ok()); + + let vm_memory = vm.address_space.vm_memory().unwrap(); + assert_eq!(vm_memory.num_regions(), 1); + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + + let kernel_file = TempFile::new().unwrap(); + let cmd_line = Cmdline::new(64); + + vm.set_kernel_config(KernelConfigInfo::new( + kernel_file.into_file(), + None, + cmd_line, + )); + + vm.init_devices(epoll_mgr).unwrap(); + } + + #[test] + fn test_vm_delete_devices() { + skip_if_not_root!(); + let mut vm = create_vm_for_test(); + let epoll_mgr = EpollManager::default(); + + vm.setup_interrupt_controller().unwrap(); + vm.init_devices(epoll_mgr).unwrap(); + assert!(vm.remove_devices().is_ok()); + } + + #[test] + fn test_run_code() { + skip_if_not_root!(); + + use std::io::{self, Write}; + // This example is based on https://lwn.net/Articles/658511/ + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, (%dx) */ + 0xb0, b'\n', /* mov $'\n', %al */ + 0xee, /* out %al, (%dx) */ + 0xf4, /* hlt */ + ]; + let load_addr = GuestAddress(0x1000); + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap(); + + let vcpu_count = 1; + let vm_config = VmConfigInfo { + vcpu_count, + max_vcpu_count: 1, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 10, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + vm.set_vm_config(vm_config); + vm.init_guest_memory().unwrap(); + + let vm_memory = vm.address_space.vm_memory().unwrap(); + vm_memory.write_obj(code, load_addr).unwrap(); + + let vcpu_fd = vm.vm_fd().create_vcpu(0).unwrap(); + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + + vcpu_regs.rip = 0x1000; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoOut(0x3f8, data) => { + assert_eq!(data.len(), 1); + io::stdout().write_all(data).unwrap(); + } + VcpuExit::Hlt => { + io::stdout().write_all(b"KVM_EXIT_HLT\n").unwrap(); + } + r => panic!("unexpected exit reason: {:?}", r), + } + } } From 510798155de2655f0ed2c1d2428c1f949373db68 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Wed, 9 Nov 2022 11:19:19 +0800 Subject: [PATCH 19/20] dragonball: Improve test cases The same EpollManager should be used instead of creating two. Signed-off-by: wllenyj --- src/dragonball/src/api/v1/vmm_action.rs | 12 ++++++------ src/dragonball/src/device_manager/mod.rs | 4 +++- src/dragonball/src/vm/mod.rs | 4 +++- src/dragonball/src/vmm.rs | 5 ++--- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 45591e7ed1..a2dd496fda 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -656,10 +656,10 @@ mod tests { let (to_vmm, from_api) = channel(); let (to_api, from_vmm) = channel(); - let vmm = Arc::new(Mutex::new(create_vmm_instance())); + let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); let mut vservice = VmmService::new(from_api, to_api); - let epoll_mgr = EpollManager::default(); let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); let mut v = vmm.lock().unwrap(); @@ -681,9 +681,9 @@ mod tests { let (_to_vmm, from_api) = channel(); let (to_api, _from_vmm) = channel(); - let vmm = Arc::new(Mutex::new(create_vmm_instance())); - let mut vservice = VmmService::new(from_api, to_api); let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); + let mut vservice = VmmService::new(from_api, to_api); let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); let mut v = vmm.lock().unwrap(); @@ -695,9 +695,9 @@ mod tests { fn test_vmm_action_disconnected() { let (to_vmm, from_api) = channel(); let (to_api, _from_vmm) = channel(); - let vmm = Arc::new(Mutex::new(create_vmm_instance())); - let mut vservice = VmmService::new(from_api, to_api); let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); + let mut vservice = VmmService::new(from_api, to_api); let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); let mut v = vmm.lock().unwrap(); diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 06f6ea6ab2..56ee3617ca 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -1065,7 +1065,9 @@ mod tests { use crate::vm::VmConfigInfo; let epoll_manager = EpollManager::default(); - let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance())); + let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance( + epoll_manager.clone(), + ))); let event_mgr = crate::event_manager::EventManager::new(&vmm, epoll_manager).unwrap(); let mut vm = crate::vm::tests::create_vm_instance(); let vm_config = VmConfigInfo { diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index b9e0dc85cc..8ad959c791 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -921,8 +921,10 @@ pub mod tests { #[test] fn test_vm_create_devices() { skip_if_not_root!(); - let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance())); let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance( + epoll_mgr.clone(), + ))); let mut guard = vmm.lock().unwrap(); let vm = guard.get_vm_mut().unwrap(); diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs index b15e66fef1..72c799e111 100644 --- a/src/dragonball/src/vmm.rs +++ b/src/dragonball/src/vmm.rs @@ -200,11 +200,10 @@ pub(crate) mod tests { use super::*; - pub fn create_vmm_instance() -> Vmm { + pub fn create_vmm_instance(epoll_manager: EpollManager) -> Vmm { let info = Arc::new(RwLock::new(InstanceInfo::default())); let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let seccomp_filter: BpfProgram = Vec::new(); - let epoll_manager = EpollManager::default(); Vmm::new_with_epoll_manager( info, @@ -221,6 +220,6 @@ pub(crate) mod tests { fn test_create_vmm_instance() { skip_if_not_root!(); - create_vmm_instance(); + create_vmm_instance(EpollManager::default()); } } From 334c4b8bdcb82597a8346489291defbaf254a159 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 15 Dec 2022 09:14:14 +0100 Subject: [PATCH 20/20] runtime: Drop QEMU log file support The QEMU log file is essentially about fine grain tracing of QEMU internals and mostly useful for developpers, not production. Notably, the log file isn't limited in size, nor rotated in any way. It means that a container running in the VM could possibly flood the log file with a guest triggerable trace. For example, on openshift, the log file is supposed to reside on a per-VM 14 GiB tmpfs mount. This means that each pod running with the kata runtime could potentially consume this amount of host RAM which is not acceptable. Error messages are best collected from QEMU's stderr as kata is doing now since PR #5736 was merged. Drop support for the QEMU log file because it doesn't bring any value but can certainly do harm. Fixes #6173 Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/qemu.go | 11 ----------- src/runtime/pkg/govmm/qemu/qemu_test.go | 5 +---- src/runtime/virtcontainers/qemu.go | 17 ----------------- 3 files changed, 1 insertion(+), 32 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 148212f511..6932826d7d 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -2628,9 +2628,6 @@ type Config struct { // PidFile is the -pidfile parameter PidFile string - // LogFile is the -D parameter - LogFile string - qemuParams []string } @@ -2968,13 +2965,6 @@ func (config *Config) appendPidFile() { } } -func (config *Config) appendLogFile() { - if config.LogFile != "" { - config.qemuParams = append(config.qemuParams, "-D") - config.qemuParams = append(config.qemuParams, config.LogFile) - } -} - func (config *Config) appendFwCfg(logger QMPLog) { if logger == nil { logger = qmpNullLogger{} @@ -3013,7 +3003,6 @@ func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, io.ReadCloser, error) config.appendIOThreads() config.appendIncoming() config.appendPidFile() - config.appendLogFile() config.appendFwCfg(logger) config.appendSeccompSandbox() diff --git a/src/runtime/pkg/govmm/qemu/qemu_test.go b/src/runtime/pkg/govmm/qemu/qemu_test.go index b7428d1019..34ca776dfc 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_test.go @@ -764,8 +764,7 @@ func TestAppendQMPSocketServer(t *testing.T) { } var pidfile = "/run/vc/vm/iamsandboxid/pidfile" -var logfile = "/run/vc/vm/iamsandboxid/logfile" -var qemuString = "-name cc-qemu -cpu host -uuid " + agentUUID + " -pidfile " + pidfile + " -D " + logfile +var qemuString = "-name cc-qemu -cpu host -uuid " + agentUUID + " -pidfile " + pidfile func TestAppendStrings(t *testing.T) { config := Config{ @@ -774,14 +773,12 @@ func TestAppendStrings(t *testing.T) { UUID: agentUUID, CPUModel: "host", PidFile: pidfile, - LogFile: logfile, } config.appendName() config.appendCPUModel() config.appendUUID() config.appendPidFile() - config.appendLogFile() result := strings.Join(config.qemuParams, " ") if result != qemuString { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index d59d67d801..3e1fd4bfda 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -940,10 +940,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { return err } q.Logger().WithField("vm path", vmPath).Info("created vm path") - // append logfile only on debug - if q.config.Debug { - q.qemuConfig.LogFile = filepath.Join(vmPath, "qemu.log") - } defer func() { if err != nil { @@ -1108,19 +1104,6 @@ func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) { } }() - if q.config.Debug && q.qemuConfig.LogFile != "" { - f, err := os.OpenFile(q.qemuConfig.LogFile, os.O_RDONLY, 0) - if err == nil { - scanner := bufio.NewScanner(f) - for scanner.Scan() { - q.Logger().WithField("file", q.qemuConfig.LogFile).Debug(scanner.Text()) - } - if err := scanner.Err(); err != nil { - q.Logger().WithError(err).Debug("read qemu log failed") - } - } - } - if err := q.qmpSetup(); err != nil { return err }