mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-27 07:48:55 +00:00
versions: update QEMU to 5.2.0
This change the version of QEMU used in the tests and CI. The scripts/configure-hypervisor.sh was changed so that: - Passing the `--enable-virtiofsd` flag - Do not compiling with -O3 to avoid the warning: Program python3 found: YES (/usr/bin/python3) ../meson.build:104: WARNING: Consider using the built-in optimization level instead of using "-O3". ../meson.build:108: WARNING: Consider using the built-in optimization level instead of using "-O3". The qemu.blacklist files was changed so that new and uneeded firmware files are removed from the final tarball. Except for qboot.rom which is new but kept, since it can be used with microvm machine type (in case we want to enable microvm in the future). The patches which are applied on QEMU sources: - 0001-virtiofsd-Allow-to-build-it-without-the-tools.patch (Build fix for Meson - allows passing `--disable-tools --enable-virtiofsd`) - 0002-virtiofsd-extract-lo_do_open-from-lo_open.patch 0003-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch 0004-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch 0005-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch 0006-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch (Security fixes for virtiofsd) - 0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch (Performance improvement for 9p driver) - 0008-hw-s390x-fix-build-for-virtio-9p-ccw.patch (Build fix for virtio-9p-ccw machine type) Fixes: #1238 Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
This commit is contained in:
parent
74a893f732
commit
88cef33b76
@ -0,0 +1,36 @@
|
||||
From a369195f7fb111a55617ff14aac4c8d87f3b996f Mon Sep 17 00:00:00 2001
|
||||
From: Wainer dos Santos Moschetta <wainersm@redhat.com>
|
||||
Date: Tue, 2 Feb 2021 13:46:24 -0500
|
||||
Subject: [PATCH] virtiofsd: Allow to build it without the tools
|
||||
|
||||
This changed the Meson build script to allow virtiofsd be built even
|
||||
though the tools build is disabled, thus honoring the --enable-virtiofsd
|
||||
option.
|
||||
|
||||
(Backport of commit xxxxxx)
|
||||
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
|
||||
---
|
||||
tools/meson.build | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tools/meson.build b/tools/meson.build
|
||||
index 76bf84df52..2a1d520127 100644
|
||||
--- a/tools/meson.build
|
||||
+++ b/tools/meson.build
|
||||
@@ -10,8 +10,11 @@ if get_option('virtiofsd').enabled()
|
||||
error('virtiofsd requires Linux')
|
||||
elif 'CONFIG_SECCOMP' not in config_host or 'CONFIG_LIBCAP_NG' not in config_host
|
||||
error('virtiofsd requires libcap-ng-devel and seccomp-devel')
|
||||
- elif not have_tools or 'CONFIG_VHOST_USER' not in config_host
|
||||
- error('virtiofsd needs tools and vhost-user support')
|
||||
+ elif 'CONFIG_VHOST_USER' not in config_host
|
||||
+ error('virtiofsd needs vhost-user support')
|
||||
+ else
|
||||
+ # Disabled all the tools but virtiofsd.
|
||||
+ have_virtiofsd = true
|
||||
endif
|
||||
endif
|
||||
elif get_option('virtiofsd').disabled() or not have_system
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,145 @@
|
||||
From 8afaaee976965b7fb90ec225a51d60f35c5f173c Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 15:02:06 +0000
|
||||
Subject: [PATCH 2/6] virtiofsd: extract lo_do_open() from lo_open()
|
||||
|
||||
Both lo_open() and lo_create() have similar code to open a file. Extract
|
||||
a common lo_do_open() function from lo_open() that will be used by
|
||||
lo_create() in a later commit.
|
||||
|
||||
Since lo_do_open() does not otherwise need fuse_req_t req, convert
|
||||
lo_add_fd_mapping() to use struct lo_data *lo instead.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20210204150208.367837-2-stefanha@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------
|
||||
1 file changed, 46 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index 5fb36d9407..f14fa5124d 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -459,17 +459,17 @@ static void lo_map_remove(struct lo_map *map, size_t key)
|
||||
}
|
||||
|
||||
/* Assumes lo->mutex is held */
|
||||
-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
|
||||
+static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd)
|
||||
{
|
||||
struct lo_map_elem *elem;
|
||||
|
||||
- elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
|
||||
+ elem = lo_map_alloc_elem(&lo->fd_map);
|
||||
if (!elem) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
elem->fd = fd;
|
||||
- return elem - lo_data(req)->fd_map.elems;
|
||||
+ return elem - lo->fd_map.elems;
|
||||
}
|
||||
|
||||
/* Assumes lo->mutex is held */
|
||||
@@ -1651,6 +1651,38 @@ static void update_open_flags(int writeback, int allow_direct_io,
|
||||
}
|
||||
}
|
||||
|
||||
+static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
+ struct fuse_file_info *fi)
|
||||
+{
|
||||
+ char buf[64];
|
||||
+ ssize_t fh;
|
||||
+ int fd;
|
||||
+
|
||||
+ update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
+
|
||||
+ sprintf(buf, "%i", inode->fd);
|
||||
+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
+ if (fd == -1) {
|
||||
+ return errno;
|
||||
+ }
|
||||
+
|
||||
+ pthread_mutex_lock(&lo->mutex);
|
||||
+ fh = lo_add_fd_mapping(lo, fd);
|
||||
+ pthread_mutex_unlock(&lo->mutex);
|
||||
+ if (fh == -1) {
|
||||
+ close(fd);
|
||||
+ return ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ fi->fh = fh;
|
||||
+ if (lo->cache == CACHE_NONE) {
|
||||
+ fi->direct_io = 1;
|
||||
+ } else if (lo->cache == CACHE_ALWAYS) {
|
||||
+ fi->keep_cache = 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
mode_t mode, struct fuse_file_info *fi)
|
||||
{
|
||||
@@ -1691,7 +1723,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
ssize_t fh;
|
||||
|
||||
pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(req, fd);
|
||||
+ fh = lo_add_fd_mapping(lo, fd);
|
||||
pthread_mutex_unlock(&lo->mutex);
|
||||
if (fh == -1) {
|
||||
close(fd);
|
||||
@@ -1892,38 +1924,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
|
||||
|
||||
static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
{
|
||||
- int fd;
|
||||
- ssize_t fh;
|
||||
- char buf[64];
|
||||
struct lo_data *lo = lo_data(req);
|
||||
+ struct lo_inode *inode = lo_inode(req, ino);
|
||||
+ int err;
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
|
||||
fi->flags);
|
||||
|
||||
- update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
-
|
||||
- sprintf(buf, "%i", lo_fd(req, ino));
|
||||
- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
- if (fd == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
- }
|
||||
-
|
||||
- pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(req, fd);
|
||||
- pthread_mutex_unlock(&lo->mutex);
|
||||
- if (fh == -1) {
|
||||
- close(fd);
|
||||
- fuse_reply_err(req, ENOMEM);
|
||||
+ if (!inode) {
|
||||
+ fuse_reply_err(req, EBADF);
|
||||
return;
|
||||
}
|
||||
|
||||
- fi->fh = fh;
|
||||
- if (lo->cache == CACHE_NONE) {
|
||||
- fi->direct_io = 1;
|
||||
- } else if (lo->cache == CACHE_ALWAYS) {
|
||||
- fi->keep_cache = 1;
|
||||
+ err = lo_do_open(lo, inode, fi);
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ if (err) {
|
||||
+ fuse_reply_err(req, err);
|
||||
+ } else {
|
||||
+ fuse_reply_open(req, fi);
|
||||
}
|
||||
- fuse_reply_open(req, fi);
|
||||
}
|
||||
|
||||
static void lo_release(fuse_req_t req, fuse_ino_t ino,
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,109 @@
|
||||
From 22d2ece71e533310da31f2857ebc4a00d91968b3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 15:02:07 +0000
|
||||
Subject: [PATCH 3/6] virtiofsd: optionally return inode pointer from
|
||||
lo_do_lookup()
|
||||
|
||||
lo_do_lookup() finds an existing inode or allocates a new one. It
|
||||
increments nlookup so that the inode stays alive until the client
|
||||
releases it.
|
||||
|
||||
Existing callers don't need the struct lo_inode so the function doesn't
|
||||
return it. Extend the function to optionally return the inode. The next
|
||||
commit will need it.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210204150208.367837-3-stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++--------
|
||||
1 file changed, 21 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index f14fa5124d..aa35fc6ba5 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -831,11 +831,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Increments nlookup and caller must release refcount using
|
||||
- * lo_inode_put(&parent).
|
||||
+ * Increments nlookup on the inode on success. unref_inode_lolocked() must be
|
||||
+ * called eventually to decrement nlookup again. If inodep is non-NULL, the
|
||||
+ * inode pointer is stored and the caller must call lo_inode_put().
|
||||
*/
|
||||
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
- struct fuse_entry_param *e)
|
||||
+ struct fuse_entry_param *e,
|
||||
+ struct lo_inode **inodep)
|
||||
{
|
||||
int newfd;
|
||||
int res;
|
||||
@@ -845,6 +847,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
struct lo_inode *inode = NULL;
|
||||
struct lo_inode *dir = lo_inode(req, parent);
|
||||
|
||||
+ if (inodep) {
|
||||
+ *inodep = NULL;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* name_to_handle_at() and open_by_handle_at() can reach here with fuse
|
||||
* mount point in guest, but we don't have its inode info in the
|
||||
@@ -913,7 +919,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
pthread_mutex_unlock(&lo->mutex);
|
||||
}
|
||||
e->ino = inode->fuse_ino;
|
||||
- lo_inode_put(lo, &inode);
|
||||
+
|
||||
+ /* Transfer ownership of inode pointer to caller or drop it */
|
||||
+ if (inodep) {
|
||||
+ *inodep = inode;
|
||||
+ } else {
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ }
|
||||
+
|
||||
lo_inode_put(lo, &dir);
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
|
||||
@@ -948,7 +961,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
|
||||
return;
|
||||
}
|
||||
|
||||
- err = lo_do_lookup(req, parent, name, &e);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
if (err) {
|
||||
fuse_reply_err(req, err);
|
||||
} else {
|
||||
@@ -1056,7 +1069,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
|
||||
goto out;
|
||||
}
|
||||
|
||||
- saverr = lo_do_lookup(req, parent, name, &e);
|
||||
+ saverr = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
if (saverr) {
|
||||
goto out;
|
||||
}
|
||||
@@ -1534,7 +1547,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
|
||||
|
||||
if (plus) {
|
||||
if (!is_dot_or_dotdot(name)) {
|
||||
- err = lo_do_lookup(req, ino, name, &e);
|
||||
+ err = lo_do_lookup(req, ino, name, &e, NULL);
|
||||
if (err) {
|
||||
goto error;
|
||||
}
|
||||
@@ -1732,7 +1745,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
}
|
||||
|
||||
fi->fh = fh;
|
||||
- err = lo_do_lookup(req, parent, name, &e);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
}
|
||||
if (lo->cache == CACHE_NONE) {
|
||||
fi->direct_io = 1;
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,299 @@
|
||||
From a3fdbbc7f271bff7d53d0501b29d910ece0b3789 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 4 Feb 2021 15:02:08 +0000
|
||||
Subject: [PATCH 4/6] virtiofsd: prevent opening of special files
|
||||
(CVE-2020-35517)
|
||||
|
||||
A well-behaved FUSE client does not attempt to open special files with
|
||||
FUSE_OPEN because they are handled on the client side (e.g. device nodes
|
||||
are handled by client-side device drivers).
|
||||
|
||||
The check to prevent virtiofsd from opening special files is missing in
|
||||
a few cases, most notably FUSE_OPEN. A malicious client can cause
|
||||
virtiofsd to open a device node, potentially allowing the guest to
|
||||
escape. This can be exploited by a modified guest device driver. It is
|
||||
not exploitable from guest userspace since the guest kernel will handle
|
||||
special files inside the guest instead of sending FUSE requests.
|
||||
|
||||
This patch fixes this issue by introducing the lo_inode_open() function
|
||||
to check the file type before opening it. This is a short-term solution
|
||||
because it does not prevent a compromised virtiofsd process from opening
|
||||
device nodes on the host.
|
||||
|
||||
Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT
|
||||
| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not
|
||||
necessary here. If the file exists and the user did not specify O_EXCL,
|
||||
open it via lo_do_open().
|
||||
|
||||
Reported-by: Alex Xu <alex@alxu.ca>
|
||||
Fixes: CVE-2020-35517
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
|
||||
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20210204150208.367837-4-stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++-----------
|
||||
1 file changed, 92 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||
index aa35fc6ba5..147b59338a 100644
|
||||
--- a/tools/virtiofsd/passthrough_ll.c
|
||||
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||
@@ -555,6 +555,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino)
|
||||
return fd;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
|
||||
+ * regular file or a directory.
|
||||
+ *
|
||||
+ * Use this helper function instead of raw openat(2) to prevent security issues
|
||||
+ * when a malicious client opens special files such as block device nodes.
|
||||
+ * Symlink inodes are also rejected since symlinks must already have been
|
||||
+ * traversed on the client side.
|
||||
+ */
|
||||
+static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
+ int open_flags)
|
||||
+{
|
||||
+ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
|
||||
+ int fd;
|
||||
+
|
||||
+ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
|
||||
+ return -EBADF;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
|
||||
+ * that the inode is not a special file but if an external process races
|
||||
+ * with us then symlinks are traversed here. It is not possible to escape
|
||||
+ * the shared directory since it is mounted as "/" though.
|
||||
+ */
|
||||
+ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
|
||||
+ if (fd < 0) {
|
||||
+ return -errno;
|
||||
+ }
|
||||
+ return fd;
|
||||
+}
|
||||
+
|
||||
static void lo_init(void *userdata, struct fuse_conn_info *conn)
|
||||
{
|
||||
struct lo_data *lo = (struct lo_data *)userdata;
|
||||
@@ -684,9 +716,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
||||
if (fi) {
|
||||
truncfd = fd;
|
||||
} else {
|
||||
- sprintf(procname, "%i", ifd);
|
||||
- truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
|
||||
+ truncfd = lo_inode_open(lo, inode, O_RDWR);
|
||||
if (truncfd < 0) {
|
||||
+ errno = -truncfd;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
@@ -848,7 +880,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
struct lo_inode *dir = lo_inode(req, parent);
|
||||
|
||||
if (inodep) {
|
||||
- *inodep = NULL;
|
||||
+ *inodep = NULL; /* in case there is an error */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1664,19 +1696,26 @@ static void update_open_flags(int writeback, int allow_direct_io,
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Open a regular file, set up an fd mapping, and fill out the struct
|
||||
+ * fuse_file_info for it. If existing_fd is not negative, use that fd instead
|
||||
+ * opening a new one. Takes ownership of existing_fd.
|
||||
+ *
|
||||
+ * Returns 0 on success or a positive errno.
|
||||
+ */
|
||||
static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
- struct fuse_file_info *fi)
|
||||
+ int existing_fd, struct fuse_file_info *fi)
|
||||
{
|
||||
- char buf[64];
|
||||
ssize_t fh;
|
||||
- int fd;
|
||||
+ int fd = existing_fd;
|
||||
|
||||
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
|
||||
- sprintf(buf, "%i", inode->fd);
|
||||
- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
|
||||
- if (fd == -1) {
|
||||
- return errno;
|
||||
+ if (fd < 0) {
|
||||
+ fd = lo_inode_open(lo, inode, fi->flags);
|
||||
+ if (fd < 0) {
|
||||
+ return -fd;
|
||||
+ }
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&lo->mutex);
|
||||
@@ -1699,9 +1738,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
|
||||
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
mode_t mode, struct fuse_file_info *fi)
|
||||
{
|
||||
- int fd;
|
||||
+ int fd = -1;
|
||||
struct lo_data *lo = lo_data(req);
|
||||
struct lo_inode *parent_inode;
|
||||
+ struct lo_inode *inode = NULL;
|
||||
struct fuse_entry_param e;
|
||||
int err;
|
||||
struct lo_cred old = {};
|
||||
@@ -1727,36 +1767,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||
|
||||
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
|
||||
|
||||
- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
|
||||
- mode);
|
||||
+ /* Try to create a new file but don't open existing files */
|
||||
+ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode);
|
||||
err = fd == -1 ? errno : 0;
|
||||
- lo_restore_cred(&old);
|
||||
|
||||
- if (!err) {
|
||||
- ssize_t fh;
|
||||
+ lo_restore_cred(&old);
|
||||
|
||||
- pthread_mutex_lock(&lo->mutex);
|
||||
- fh = lo_add_fd_mapping(lo, fd);
|
||||
- pthread_mutex_unlock(&lo->mutex);
|
||||
- if (fh == -1) {
|
||||
- close(fd);
|
||||
- err = ENOMEM;
|
||||
- goto out;
|
||||
- }
|
||||
+ /* Ignore the error if file exists and O_EXCL was not given */
|
||||
+ if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
- fi->fh = fh;
|
||||
- err = lo_do_lookup(req, parent, name, &e, NULL);
|
||||
+ err = lo_do_lookup(req, parent, name, &e, &inode);
|
||||
+ if (err) {
|
||||
+ goto out;
|
||||
}
|
||||
- if (lo->cache == CACHE_NONE) {
|
||||
- fi->direct_io = 1;
|
||||
- } else if (lo->cache == CACHE_ALWAYS) {
|
||||
- fi->keep_cache = 1;
|
||||
+
|
||||
+ err = lo_do_open(lo, inode, fd, fi);
|
||||
+ fd = -1; /* lo_do_open() takes ownership of fd */
|
||||
+ if (err) {
|
||||
+ /* Undo lo_do_lookup() nlookup ref */
|
||||
+ unref_inode_lolocked(lo, inode, 1);
|
||||
}
|
||||
|
||||
out:
|
||||
+ lo_inode_put(lo, &inode);
|
||||
lo_inode_put(lo, &parent_inode);
|
||||
|
||||
if (err) {
|
||||
+ if (fd >= 0) {
|
||||
+ close(fd);
|
||||
+ }
|
||||
+
|
||||
fuse_reply_err(req, err);
|
||||
} else {
|
||||
fuse_reply_create(req, &e, fi);
|
||||
@@ -1770,7 +1812,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
|
||||
pid_t pid, int *err)
|
||||
{
|
||||
struct lo_inode_plock *plock;
|
||||
- char procname[64];
|
||||
int fd;
|
||||
|
||||
plock =
|
||||
@@ -1787,12 +1828,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
|
||||
}
|
||||
|
||||
/* Open another instance of file which can be used for ofd locks. */
|
||||
- sprintf(procname, "%i", inode->fd);
|
||||
-
|
||||
/* TODO: What if file is not writable? */
|
||||
- fd = openat(lo->proc_self_fd, procname, O_RDWR);
|
||||
- if (fd == -1) {
|
||||
- *err = errno;
|
||||
+ fd = lo_inode_open(lo, inode, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ *err = -fd;
|
||||
free(plock);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1949,7 +1988,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
return;
|
||||
}
|
||||
|
||||
- err = lo_do_open(lo, inode, fi);
|
||||
+ err = lo_do_open(lo, inode, -1, fi);
|
||||
lo_inode_put(lo, &inode);
|
||||
if (err) {
|
||||
fuse_reply_err(req, err);
|
||||
@@ -2014,39 +2053,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
|
||||
static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
|
||||
struct fuse_file_info *fi)
|
||||
{
|
||||
+ struct lo_inode *inode = lo_inode(req, ino);
|
||||
+ struct lo_data *lo = lo_data(req);
|
||||
int res;
|
||||
int fd;
|
||||
- char *buf;
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
|
||||
(void *)fi);
|
||||
|
||||
- if (!fi) {
|
||||
- struct lo_data *lo = lo_data(req);
|
||||
-
|
||||
- res = asprintf(&buf, "%i", lo_fd(req, ino));
|
||||
- if (res == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
- }
|
||||
+ if (!inode) {
|
||||
+ fuse_reply_err(req, EBADF);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- fd = openat(lo->proc_self_fd, buf, O_RDWR);
|
||||
- free(buf);
|
||||
- if (fd == -1) {
|
||||
- return (void)fuse_reply_err(req, errno);
|
||||
+ if (!fi) {
|
||||
+ fd = lo_inode_open(lo, inode, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ res = -fd;
|
||||
+ goto out;
|
||||
}
|
||||
} else {
|
||||
fd = lo_fi_fd(req, fi);
|
||||
}
|
||||
|
||||
if (datasync) {
|
||||
- res = fdatasync(fd);
|
||||
+ res = fdatasync(fd) == -1 ? errno : 0;
|
||||
} else {
|
||||
- res = fsync(fd);
|
||||
+ res = fsync(fd) == -1 ? errno : 0;
|
||||
}
|
||||
if (!fi) {
|
||||
close(fd);
|
||||
}
|
||||
- fuse_reply_err(req, res == -1 ? errno : 0);
|
||||
+out:
|
||||
+ lo_inode_put(lo, &inode);
|
||||
+ fuse_reply_err(req, res);
|
||||
}
|
||||
|
||||
static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,31 @@
|
||||
From 62124e5080e6f49f294caad60fbee26cc2d39d8f Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Thu, 21 Jan 2021 18:15:40 +0100
|
||||
Subject: [PATCH 5/6] virtiofsd: Add _llseek to the seccomp whitelist
|
||||
|
||||
This is how glibc implements lseek(2) on POWER.
|
||||
|
||||
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210121171540.1449777-1-groug@kaod.org>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_seccomp.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
|
||||
index a60d7da4b4..1ecf5bf318 100644
|
||||
--- a/tools/virtiofsd/passthrough_seccomp.c
|
||||
+++ b/tools/virtiofsd/passthrough_seccomp.c
|
||||
@@ -65,6 +65,7 @@ static const int syscall_whitelist[] = {
|
||||
SCMP_SYS(linkat),
|
||||
SCMP_SYS(listxattr),
|
||||
SCMP_SYS(lseek),
|
||||
+ SCMP_SYS(_llseek), /* For POWER */
|
||||
SCMP_SYS(madvise),
|
||||
SCMP_SYS(mkdirat),
|
||||
SCMP_SYS(mknodat),
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,33 @@
|
||||
From cf269ff8032392bbdd700e7bfd21823ad9eee3a9 Mon Sep 17 00:00:00 2001
|
||||
From: Greg Kurz <groug@kaod.org>
|
||||
Date: Mon, 1 Feb 2021 20:33:05 +0100
|
||||
Subject: [PATCH 6/6] virtiofsd: Add restart_syscall to the seccomp whitelist
|
||||
|
||||
This is how linux restarts some system calls after SIGSTOP/SIGCONT.
|
||||
This is needed to avoid virtiofsd termination when resuming execution
|
||||
under GDB for example.
|
||||
|
||||
Signed-off-by: Greg Kurz <groug@kaod.org>
|
||||
Message-Id: <20210201193305.136390-1-groug@kaod.org>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tools/virtiofsd/passthrough_seccomp.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
|
||||
index 1ecf5bf318..ea852e2e33 100644
|
||||
--- a/tools/virtiofsd/passthrough_seccomp.c
|
||||
+++ b/tools/virtiofsd/passthrough_seccomp.c
|
||||
@@ -89,6 +89,7 @@ static const int syscall_whitelist[] = {
|
||||
SCMP_SYS(renameat),
|
||||
SCMP_SYS(renameat2),
|
||||
SCMP_SYS(removexattr),
|
||||
+ SCMP_SYS(restart_syscall),
|
||||
SCMP_SYS(rt_sigaction),
|
||||
SCMP_SYS(rt_sigprocmask),
|
||||
SCMP_SYS(rt_sigreturn),
|
||||
--
|
||||
2.26.2
|
||||
|
@ -0,0 +1,98 @@
|
||||
From 3de89ce9fb5eda46f7cefd70e9090cb7cd7ec803 Mon Sep 17 00:00:00 2001
|
||||
From: Yang Zhong <yang.zhong@intel.com>
|
||||
Date: Wed, 28 Mar 2018 20:14:53 +0800
|
||||
Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O
|
||||
performance
|
||||
|
||||
This is a quick workaround, need to be fixed.
|
||||
|
||||
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
|
||||
---
|
||||
hw/9pfs/9p.c | 12 +++++-------
|
||||
hw/9pfs/9p.h | 6 +++---
|
||||
hw/9pfs/coth.h | 3 +++
|
||||
3 files changed, 11 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
|
||||
index 9e046f7acb..11c8ee08d9 100644
|
||||
--- a/hw/9pfs/9p.c
|
||||
+++ b/hw/9pfs/9p.c
|
||||
@@ -1082,10 +1082,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
|
||||
out_notify:
|
||||
pdu->s->transport->push_and_notify(pdu);
|
||||
|
||||
- /* Now wakeup anybody waiting in flush for this request */
|
||||
- if (!qemu_co_queue_next(&pdu->complete)) {
|
||||
- pdu_free(pdu);
|
||||
- }
|
||||
+ pdu_free(pdu);
|
||||
}
|
||||
|
||||
static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
|
||||
@@ -3997,7 +3994,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu)
|
||||
|
||||
void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
|
||||
{
|
||||
- Coroutine *co;
|
||||
+// Coroutine *co;
|
||||
CoroutineEntry *handler;
|
||||
V9fsState *s = pdu->s;
|
||||
|
||||
@@ -4015,8 +4012,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
|
||||
}
|
||||
|
||||
qemu_co_queue_init(&pdu->complete);
|
||||
- co = qemu_coroutine_create(handler, pdu);
|
||||
- qemu_coroutine_enter(co);
|
||||
+ handler(pdu);
|
||||
+ //co = qemu_coroutine_create(handler, pdu);
|
||||
+ //qemu_coroutine_enter(co);
|
||||
}
|
||||
|
||||
/* Returns 0 on success, 1 on failure. */
|
||||
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
|
||||
index b8f72a3bd9..d16bf9d05e 100644
|
||||
--- a/hw/9pfs/9p.h
|
||||
+++ b/hw/9pfs/9p.h
|
||||
@@ -391,21 +391,21 @@ extern int total_open_fd;
|
||||
static inline void v9fs_path_write_lock(V9fsState *s)
|
||||
{
|
||||
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||
- qemu_co_rwlock_wrlock(&s->rename_lock);
|
||||
+ // qemu_co_rwlock_wrlock(&s->rename_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void v9fs_path_read_lock(V9fsState *s)
|
||||
{
|
||||
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||
- qemu_co_rwlock_rdlock(&s->rename_lock);
|
||||
+ // qemu_co_rwlock_rdlock(&s->rename_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void v9fs_path_unlock(V9fsState *s)
|
||||
{
|
||||
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||
- qemu_co_rwlock_unlock(&s->rename_lock);
|
||||
+ // qemu_co_rwlock_unlock(&s->rename_lock);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
|
||||
index c2cdc7a9ea..0fe971d1f5 100644
|
||||
--- a/hw/9pfs/coth.h
|
||||
+++ b/hw/9pfs/coth.h
|
||||
@@ -46,6 +46,9 @@
|
||||
qemu_coroutine_yield(); \
|
||||
} while (0)
|
||||
|
||||
+#undef v9fs_co_run_in_worker
|
||||
+#define v9fs_co_run_in_worker(code_block) do {code_block} while(0);
|
||||
+
|
||||
void co_run_in_worker_bh(void *);
|
||||
int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
|
||||
int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
|
||||
--
|
||||
2.21.0
|
||||
|
@ -0,0 +1,65 @@
|
||||
From 22b6a14e0b56989e4ac68377eb1cff7c404e14ae Mon Sep 17 00:00:00 2001
|
||||
From: Halil Pasic <pasic@linux.ibm.com>
|
||||
Date: Thu, 18 Feb 2021 04:40:59 +0100
|
||||
Subject: [PATCH] hw/s390x: fix build for virtio-9p-ccw
|
||||
|
||||
Commit 2c44220d05 ("meson: convert hw/arch*"), which migrated the old
|
||||
Makefile.objs to meson.build accidentally excluded virtio-ccw-9p.c and
|
||||
thus the virtio-9p-ccw device from the build (and potentially also
|
||||
included the file virtio-ccw-blk.c twice in the source set). And since
|
||||
CONFIG_VIRTFS can't be used the way it was used here (see commit
|
||||
2c9dce0196 ("meson: do not use CONFIG_VIRTFS")), the preconditions have
|
||||
to be written differently.
|
||||
|
||||
Let's fix this!
|
||||
|
||||
Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
|
||||
Fixes: 2c44220d05 ("meson: convert hw/arch*")
|
||||
Reported-by: Jakob Naucke <jakob.naucke@ibm.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
|
||||
X-Backport-Note: add the definition of have_virtfs from 69202b406e3
|
||||
Origin: backport, https://lists.nongnu.org/archive/html/qemu-devel/2021-02/msg05793.html
|
||||
Bug-Ubuntu: https://bugs.launchpad.net/bugs/1916230
|
||||
Last-Update: 2021-02-22
|
||||
|
||||
Cherry picked from https://git.launchpad.net/ubuntu/+source/qemu/plain/debian/patches/ubuntu/lp-1916230-hw-s390x-fix-build-for-virtio-9p-ccw.patch?id=e4d879c593bc0f3758248de4e2b4087d9f5dfc7e
|
||||
---
|
||||
hw/s390x/meson.build | 4 +++-
|
||||
meson.build | 5 +++++
|
||||
2 files changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
|
||||
index 2a7818d94b..91495b5631 100644
|
||||
--- a/hw/s390x/meson.build
|
||||
+++ b/hw/s390x/meson.build
|
||||
@@ -40,7 +40,9 @@ virtio_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-ccw-net.c'))
|
||||
virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-ccw-rng.c'))
|
||||
virtio_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio-ccw-scsi.c'))
|
||||
virtio_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-ccw-serial.c'))
|
||||
-virtio_ss.add(when: ['CONFIG_VIRTIO_9P', 'CONFIG_VIRTFS'], if_true: files('virtio-ccw-blk.c'))
|
||||
+if have_virtfs
|
||||
+ virtio_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-ccw-9p.c'))
|
||||
+endif
|
||||
virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-ccw.c'))
|
||||
virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-ccw.c'))
|
||||
s390x_ss.add_all(when: 'CONFIG_VIRTIO_CCW', if_true: virtio_ss)
|
||||
diff --git a/meson.build b/meson.build
|
||||
index e3386196ba..fcfe4fc0b9 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -777,6 +777,11 @@ endif
|
||||
# config-host.h #
|
||||
#################
|
||||
|
||||
+have_virtfs = (targetos == 'linux' and
|
||||
+ have_system and
|
||||
+ libattr.found() and
|
||||
+ libcap_ng.found())
|
||||
+
|
||||
config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir'))
|
||||
config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix'))
|
||||
config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir)
|
||||
--
|
||||
2.26.2
|
||||
|
@ -334,6 +334,16 @@ generate_qemu_options() {
|
||||
# Don't build the qemu-io, qemu-nbd and qemu-image tools
|
||||
qemu_options+=(size:--disable-tools)
|
||||
|
||||
# Kata Containers may be configured to use the virtiofs daemon.
|
||||
#
|
||||
# But since QEMU 5.2 the daemon is built as part of the tools set
|
||||
# (disabled with --disable-tools) thus it needs to be explicitely
|
||||
# enabled.
|
||||
if gt_eq "${qemu_version}" "5.2.0" ; then
|
||||
qemu_options+=(functionality:--enable-virtiofsd)
|
||||
qemu_options+=(functionality:--enable-virtfs)
|
||||
fi
|
||||
|
||||
# Don't build linux-user bsd-user
|
||||
qemu_options+=(size:--disable-bsd-user)
|
||||
qemu_options+=(size:--disable-linux-user)
|
||||
@ -461,7 +471,10 @@ generate_qemu_options() {
|
||||
_qemu_cflags=""
|
||||
|
||||
# compile with high level of optimisation
|
||||
_qemu_cflags+=" -O3"
|
||||
# On version 5.2.0 onward the Meson build system warns to not use -O3
|
||||
if ! gt_eq "${qemu_version}" "5.2.0" ; then
|
||||
_qemu_cflags+=" -O3"
|
||||
fi
|
||||
|
||||
# Improve code quality by assuming identical semantics for interposed
|
||||
# synmbols.
|
||||
|
@ -19,9 +19,11 @@ qemu_black_list=(
|
||||
*/share/*/*.img
|
||||
*/share/*/keymaps
|
||||
*/share/*/multiboot.bin
|
||||
*/share/*/npcm7xx_bootrom.bin
|
||||
*/share/*/openbios-ppc
|
||||
*/share/*/openbios-sparc32
|
||||
*/share/*/openbios-sparc64
|
||||
*/share/*/opensbi-riscv*
|
||||
*/share/*/palcode-clipper
|
||||
*/share/*/pxe-*
|
||||
*/share/*/QEMU,*
|
||||
|
@ -40,6 +40,7 @@ RUN apt-get --no-install-recommends install -y \
|
||||
libselinux1-dev \
|
||||
libtool \
|
||||
make \
|
||||
ninja-build \
|
||||
pkg-config \
|
||||
libseccomp-dev \
|
||||
libseccomp2 \
|
||||
@ -65,6 +66,5 @@ RUN PREFIX="${PREFIX}" /root/configure-hypervisor.sh -s kata-qemu | xargs ./conf
|
||||
--with-pkgversion=kata-static
|
||||
|
||||
RUN make -j$(nproc)
|
||||
RUN make -j$(nproc) virtiofsd
|
||||
RUN make install DESTDIR="${QEMU_DESTDIR}"
|
||||
RUN /root/static-build/scripts/qemu-build-post.sh
|
||||
|
@ -88,8 +88,8 @@ assets:
|
||||
qemu:
|
||||
description: "VMM that uses KVM"
|
||||
url: "https://github.com/qemu/qemu"
|
||||
version: "5.0.0"
|
||||
tag: "v5.0.0"
|
||||
version: "5.2.0"
|
||||
tag: "v5.2.0"
|
||||
# Do not include any non-full release versions
|
||||
# Break the line *without CR or space being appended*, to appease
|
||||
# yamllint, and note the deliberate ' ' at the end of the expression.
|
||||
|
Loading…
Reference in New Issue
Block a user