diff --git a/tools/packaging/qemu/patches/5.0.x/0004-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch b/tools/packaging/qemu/patches/5.0.x/0004-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch new file mode 100644 index 0000000000..36c3253e76 --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0004-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch @@ -0,0 +1,104 @@ +From f876aae825d77aec1a735ecf5b2bc821eba11913 Mon Sep 17 00:00:00 2001 +From: Jiachen Zhang +Date: Thu, 4 Feb 2021 22:22:03 +0100 +Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options + +Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT +open flag of guest applications will be discarded by virtiofsd. While +this behavior makes it consistent with the virtio-9p scheme when guest +applications use direct I/O, we no longer have any chance to bypass the +host page cache. + +Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o + no_allow_direct_io' option is added, or none of '-o allow_direct_io' or + '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to + 0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' +is added to the starting command-line, 'allow_direct_io' will be set to +1, so that the O_DIRECT flags will be retained and host page cache can +be bypassed. + +Signed-off-by: Jiachen Zhang +Reviewed-by: Stefan Hajnoczi +Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f7b9c1d2..bec60352 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -151,6 +151,7 @@ struct lo_data { + int timeout_set; + int readdirplus_set; + int readdirplus_clear; ++ int allow_direct_io; + struct lo_inode root; + GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ +@@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { + { "norace", offsetof(struct lo_data, norace), 1 }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, + { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, ++ { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, ++ { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, + FUSE_OPT_END + }; + static bool use_syslog = false; +@@ -1677,7 +1680,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + fuse_reply_err(req, 0); + } + +-static void update_open_flags(int writeback, struct fuse_file_info *fi) ++static void update_open_flags(int writeback, int allow_direct_io, ++ struct fuse_file_info *fi) + { + /* + * With writeback cache, kernel may send read requests even +@@ -1702,10 +1706,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) + + /* + * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. ++ * cache on host as well. Therefore, we discard it by default ++ * ('-o no_allow_direct_io'). If somebody needs that behavior, ++ * the '-o allow_direct_io' option should be set. + */ +- fi->flags &= ~O_DIRECT; ++ if (!allow_direct_io) { ++ fi->flags &= ~O_DIRECT; ++ } + } + + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1737,7 +1744,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- update_open_flags(lo->writeback, fi); ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); + + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +@@ -1947,7 +1954,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, fi); ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); + + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +@@ -2852,6 +2859,7 @@ int main(int argc, char *argv[]) + .debug = 0, + .writeback = 0, + .posix_lock = 1, ++ .allow_direct_io = 0, + .proc_self_fd = -1, + }; + struct lo_map_elem *root_elem; +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.0.x/0005-virtiofsd-extract-lo_do_open-from-lo_open.patch b/tools/packaging/qemu/patches/5.0.x/0005-virtiofsd-extract-lo_do_open-from-lo_open.patch new file mode 100644 index 0000000000..e36c19817b --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0005-virtiofsd-extract-lo_do_open-from-lo_open.patch @@ -0,0 +1,145 @@ +From 7979901047bdad744b1cfaedb1fa99dc4c5738b8 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 20:16:42 +0100 +Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() + +Both lo_open() and lo_create() have similar code to open a file. Extract +a common lo_do_open() function from lo_open() that will be used by +lo_create() in a later commit. + +Since lo_do_open() does not otherwise need fuse_req_t req, convert +lo_add_fd_mapping() to use struct lo_data *lo instead. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-2-stefanha@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index bec60352..6eb6a754 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ +-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) + { + struct lo_map_elem *elem; + +- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ elem = lo_map_alloc_elem(&lo->fd_map); + if (!elem) { + return -1; + } + + elem->fd = fd; +- return elem - lo_data(req)->fd_map.elems; ++ return elem - lo->fd_map.elems; + } + + /* Assumes lo->mutex is held */ +@@ -1715,6 +1715,38 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, ++ struct fuse_file_info *fi) ++{ ++ char buf[64]; ++ ssize_t fh; ++ int fd; ++ ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); ++ ++ sprintf(buf, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return errno; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(lo, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ return ENOMEM; ++ } ++ ++ fi->fh = fh; ++ if (lo->cache == CACHE_NONE) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ return 0; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1755,7 +1787,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + ssize_t fh; + + pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); ++ fh = lo_add_fd_mapping(lo, fd); + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +@@ -1946,38 +1978,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- ssize_t fh; +- char buf[64]; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ int err; + + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, lo->allow_direct_io, fi); +- +- sprintf(buf, "%i", lo_fd(req, ino)); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } +- +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- fuse_reply_err(req, ENOMEM); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); + return; + } + +- fi->fh = fh; +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ err = lo_do_open(lo, inode, fi); ++ lo_inode_put(lo, &inode); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_open(req, fi); + } +- fuse_reply_open(req, fi); + } + + static void lo_release(fuse_req_t req, fuse_ino_t ino, +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.0.x/0006-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/tools/packaging/qemu/patches/5.0.x/0006-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch new file mode 100644 index 0000000000..96c0c9c233 --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0006-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch @@ -0,0 +1,109 @@ +From 6772514121ccfecc0824b8f2ec39377368c564a0 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 15:02:07 +0000 +Subject: [PATCH] virtiofsd: optionally return inode pointer from + lo_do_lookup() + +lo_do_lookup() finds an existing inode or allocates a new one. It +increments nlookup so that the inode stays alive until the client +releases it. + +Existing callers don't need the struct lo_inode so the function doesn't +return it. Extend the function to optionally return the inode. The next +commit will need it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Greg Kurz +Message-Id: <20210204150208.367837-3-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6eb6a754..422ea75f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -881,11 +881,13 @@ static void posix_locks_value_destroy(gpointer data) + } + + /* +- * Increments nlookup and caller must release refcount using +- * lo_inode_put(&parent). ++ * Increments nlookup on the inode on success. unref_inode_lolocked() must be ++ * called eventually to decrement nlookup again. If inodep is non-NULL, the ++ * inode pointer is stored and the caller must call lo_inode_put(). + */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e, ++ struct lo_inode **inodep) + { + int newfd; + int res; +@@ -894,6 +896,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); + ++ if (inodep) { ++ *inodep = NULL; ++ } ++ + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse + * mount point in guest, but we don't have its inode info in the +@@ -954,7 +960,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +- lo_inode_put(lo, &inode); ++ ++ /* Transfer ownership of inode pointer to caller or drop it */ ++ if (inodep) { ++ *inodep = inode; ++ } else { ++ lo_inode_put(lo, &inode); ++ } ++ + lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, +@@ -989,7 +1002,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + return; + } + +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1097,7 +1110,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- saverr = lo_do_lookup(req, parent, name, &e); ++ saverr = lo_do_lookup(req, parent, name, &e, NULL); + if (saverr) { + goto out; + } +@@ -1598,7 +1611,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + if (plus) { + if (!is_dot_or_dotdot(name)) { +- err = lo_do_lookup(req, ino, name, &e); ++ err = lo_do_lookup(req, ino, name, &e, NULL); + if (err) { + goto error; + } +@@ -1796,7 +1809,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + } + if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.0.x/0007-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/tools/packaging/qemu/patches/5.0.x/0007-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch new file mode 100644 index 0000000000..1eed68edff --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0007-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch @@ -0,0 +1,298 @@ +From 19f9b9c7fb8433830b661441c6646cda42223354 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 20:20:00 +0100 +Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) + +A well-behaved FUSE client does not attempt to open special files with +FUSE_OPEN because they are handled on the client side (e.g. device nodes +are handled by client-side device drivers). + +The check to prevent virtiofsd from opening special files is missing in +a few cases, most notably FUSE_OPEN. A malicious client can cause +virtiofsd to open a device node, potentially allowing the guest to +escape. This can be exploited by a modified guest device driver. It is +not exploitable from guest userspace since the guest kernel will handle +special files inside the guest instead of sending FUSE requests. + +This patch fixes this issue by introducing the lo_inode_open() function +to check the file type before opening it. This is a short-term solution +because it does not prevent a compromised virtiofsd process from opening +device nodes on the host. + +Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT +| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not +necessary here. If the file exists and the user did not specify O_EXCL, +open it via lo_do_open(). + +Reported-by: Alex Xu +Fixes: CVE-2020-35517 +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Vivek Goyal +Reviewed-by: Greg Kurz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-4-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 52 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 422ea75f..da4310e3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return fd; + } + ++/* ++ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a ++ * regular file or a directory. ++ * ++ * Use this helper function instead of raw openat(2) to prevent security issues ++ * when a malicious client opens special files such as block device nodes. ++ * Symlink inodes are also rejected since symlinks must already have been ++ * traversed on the client side. ++ */ ++static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, ++ int open_flags) ++{ ++ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); ++ int fd; ++ ++ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { ++ return -EBADF; ++ } ++ ++ /* ++ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier ++ * that the inode is not a special file but if an external process races ++ * with us then symlinks are traversed here. It is not possible to escape ++ * the shared directory since it is mounted as "/" though. ++ */ ++ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); ++ if (fd < 0) { ++ return -errno; ++ } ++ return fd; ++} ++ + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -791,9 +823,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + truncfd = fd; + } else { +- sprintf(procname, "%i", ifd); +- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ truncfd = lo_inode_open(lo, inode, O_RDWR); + if (truncfd < 0) { ++ errno = -truncfd; + goto out_err; + } + } +@@ -897,7 +929,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *dir = lo_inode(req, parent); + + if (inodep) { +- *inodep = NULL; ++ *inodep = NULL; /* in case there is an error */ + } + + /* +@@ -1728,19 +1760,26 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++/* ++ * Open a regular file, set up an fd mapping, and fill out the struct ++ * fuse_file_info for it. If existing_fd is not negative, use that fd instead ++ * opening a new one. Takes ownership of existing_fd. ++ * ++ * Returns 0 on success or a positive errno. ++ */ + static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, +- struct fuse_file_info *fi) ++ int existing_fd, struct fuse_file_info *fi) + { +- char buf[64]; + ssize_t fh; +- int fd; ++ int fd = existing_fd; + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- sprintf(buf, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return errno; ++ if (fd < 0) { ++ fd = lo_inode_open(lo, inode, fi->flags); ++ if (fd < 0) { ++ return -fd; ++ } + } + + pthread_mutex_lock(&lo->mutex); +@@ -1763,9 +1802,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +- int fd; ++ int fd = -1; + struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; ++ struct lo_inode *inode = NULL; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1791,36 +1831,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, +- mode); ++ /* Try to create a new file but don't open existing files */ ++ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); + err = fd == -1 ? errno : 0; +- lo_restore_cred(&old); + +- if (!err) { +- ssize_t fh; ++ lo_restore_cred(&old); + +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(lo, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- err = ENOMEM; +- goto out; +- } ++ /* Ignore the error if file exists and O_EXCL was not given */ ++ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { ++ goto out; ++ } + +- fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e, NULL); ++ err = lo_do_lookup(req, parent, name, &e, &inode); ++ if (err) { ++ goto out; + } +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ ++ err = lo_do_open(lo, inode, fd, fi); ++ fd = -1; /* lo_do_open() takes ownership of fd */ ++ if (err) { ++ /* Undo lo_do_lookup() nlookup ref */ ++ unref_inode_lolocked(lo, inode, 1); + } + + out: ++ lo_inode_put(lo, &inode); + lo_inode_put(lo, &parent_inode); + + if (err) { ++ if (fd >= 0) { ++ close(fd); ++ } ++ + fuse_reply_err(req, err); + } else { + fuse_reply_create(req, &e, fi); +@@ -1834,7 +1876,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + pid_t pid, int *err) + { + struct lo_inode_plock *plock; +- char procname[64]; + int fd; + + plock = +@@ -1851,12 +1892,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + } + + /* Open another instance of file which can be used for ofd locks. */ +- sprintf(procname, "%i", inode->fd); +- + /* TODO: What if file is not writable? */ +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd == -1) { +- *err = errno; ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ *err = -fd; + free(plock); + return NULL; + } +@@ -2003,7 +2042,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return; + } + +- err = lo_do_open(lo, inode, fi); ++ err = lo_do_open(lo, inode, -1, fi); + lo_inode_put(lo, &inode); + if (err) { + fuse_reply_err(req, err); +@@ -2059,39 +2098,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); + int res; + int fd; +- char *buf; + + fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, + (void *)fi); + +- if (!fi) { +- struct lo_data *lo = lo_data(req); +- +- res = asprintf(&buf, "%i", lo_fd(req, ino)); +- if (res == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } + +- fd = openat(lo->proc_self_fd, buf, O_RDWR); +- free(buf); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); ++ if (!fi) { ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ res = -fd; ++ goto out; + } + } else { + fd = lo_fi_fd(req, fi); + } + + if (datasync) { +- res = fdatasync(fd); ++ res = fdatasync(fd) == -1 ? errno : 0; + } else { +- res = fsync(fd); ++ res = fsync(fd) == -1 ? errno : 0; + } + if (!fi) { + close(fd); + } +- fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ lo_inode_put(lo, &inode); ++ fuse_reply_err(req, res); + } + + static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.0.x/0008-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch b/tools/packaging/qemu/patches/5.0.x/0008-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch new file mode 100644 index 0000000000..2837775ca1 --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0008-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch @@ -0,0 +1,31 @@ +From a68a49b83f6c2694a080bb3761f566bd84b7ca26 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 4 Feb 2021 20:23:16 +0100 +Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist + +This is how glibc implements lseek(2) on POWER. + +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 +Signed-off-by: Greg Kurz +Message-Id: <20210121171540.1449777-1-groug@kaod.org> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/seccomp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 7f93fe87..57642ff8 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(linkat), + SCMP_SYS(listxattr), + SCMP_SYS(lseek), ++ SCMP_SYS(_llseek), /* For POWER */ + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), + SCMP_SYS(mknodat), +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.0.x/0009-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch b/tools/packaging/qemu/patches/5.0.x/0009-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch new file mode 100644 index 0000000000..44803b15b7 --- /dev/null +++ b/tools/packaging/qemu/patches/5.0.x/0009-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch @@ -0,0 +1,33 @@ +From c013c9a1d796d1feae143f02b3c654f0a42f7055 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 4 Feb 2021 20:24:28 +0100 +Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist + +This is how linux restarts some system calls after SIGSTOP/SIGCONT. +This is needed to avoid virtiofsd termination when resuming execution +under GDB for example. + +Signed-off-by: Greg Kurz +Message-Id: <20210201193305.136390-1-groug@kaod.org> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/seccomp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 57642ff8..004f5026 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(renameat), + SCMP_SYS(renameat2), + SCMP_SYS(removexattr), ++ SCMP_SYS(restart_syscall), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0007-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch b/tools/packaging/qemu/patches/5.1.x/0007-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch new file mode 100644 index 0000000000..e9178480ae --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0007-virtiofsd-Add-o-allow_direct_io-no_allow_direct_io-o.patch @@ -0,0 +1,104 @@ +From c5c8e946f8200de1c6d917949e98484908b43f21 Mon Sep 17 00:00:00 2001 +From: Jiachen Zhang +Date: Thu, 4 Feb 2021 22:11:28 +0100 +Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options + +Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT +open flag of guest applications will be discarded by virtiofsd. While +this behavior makes it consistent with the virtio-9p scheme when guest +applications use direct I/O, we no longer have any chance to bypass the +host page cache. + +Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o + no_allow_direct_io' option is added, or none of '-o allow_direct_io' or + '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to + 0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' +is added to the starting command-line, 'allow_direct_io' will be set to +1, so that the O_DIRECT flags will be retained and host page cache can +be bypassed. + +Signed-off-by: Jiachen Zhang +Reviewed-by: Stefan Hajnoczi +Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 94e0de2d..289c9985 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -151,6 +151,7 @@ struct lo_data { + int timeout_set; + int readdirplus_set; + int readdirplus_clear; ++ int allow_direct_io; + struct lo_inode root; + GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ +@@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, + { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, ++ { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, ++ { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, + FUSE_OPT_END + }; + static bool use_syslog = false; +@@ -1516,7 +1519,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + fuse_reply_err(req, 0); + } + +-static void update_open_flags(int writeback, struct fuse_file_info *fi) ++static void update_open_flags(int writeback, int allow_direct_io, ++ struct fuse_file_info *fi) + { + /* + * With writeback cache, kernel may send read requests even +@@ -1541,10 +1545,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) + + /* + * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. ++ * cache on host as well. Therefore, we discard it by default ++ * ('-o no_allow_direct_io'). If somebody needs that behavior, ++ * the '-o allow_direct_io' option should be set. + */ +- fi->flags &= ~O_DIRECT; ++ if (!allow_direct_io) { ++ fi->flags &= ~O_DIRECT; ++ } + } + + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1576,7 +1583,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- update_open_flags(lo->writeback, fi); ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); + + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +@@ -1786,7 +1793,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, fi); ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); + + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +@@ -2824,6 +2831,7 @@ int main(int argc, char *argv[]) + .debug = 0, + .writeback = 0, + .posix_lock = 1, ++ .allow_direct_io = 0, + .proc_self_fd = -1, + }; + struct lo_map_elem *root_elem; +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0008-virtiofsd-extract-lo_do_open-from-lo_open.patch b/tools/packaging/qemu/patches/5.1.x/0008-virtiofsd-extract-lo_do_open-from-lo_open.patch new file mode 100644 index 0000000000..418f0fb55c --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0008-virtiofsd-extract-lo_do_open-from-lo_open.patch @@ -0,0 +1,145 @@ +From 04600a6b1ad6376e6da955a929196b9c19fa7f4a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 20:16:42 +0100 +Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() + +Both lo_open() and lo_create() have similar code to open a file. Extract +a common lo_do_open() function from lo_open() that will be used by +lo_create() in a later commit. + +Since lo_do_open() does not otherwise need fuse_req_t req, convert +lo_add_fd_mapping() to use struct lo_data *lo instead. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-2-stefanha@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 289c9985..eaec0c5a 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ +-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) + { + struct lo_map_elem *elem; + +- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ elem = lo_map_alloc_elem(&lo->fd_map); + if (!elem) { + return -1; + } + + elem->fd = fd; +- return elem - lo_data(req)->fd_map.elems; ++ return elem - lo->fd_map.elems; + } + + /* Assumes lo->mutex is held */ +@@ -1554,6 +1554,38 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, ++ struct fuse_file_info *fi) ++{ ++ char buf[64]; ++ ssize_t fh; ++ int fd; ++ ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); ++ ++ sprintf(buf, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return errno; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(lo, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ return ENOMEM; ++ } ++ ++ fi->fh = fh; ++ if (lo->cache == CACHE_NONE) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ return 0; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1594,7 +1626,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + ssize_t fh; + + pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); ++ fh = lo_add_fd_mapping(lo, fd); + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +@@ -1785,38 +1817,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- ssize_t fh; +- char buf[64]; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ int err; + + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, lo->allow_direct_io, fi); +- +- sprintf(buf, "%i", lo_fd(req, ino)); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } +- +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- fuse_reply_err(req, ENOMEM); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); + return; + } + +- fi->fh = fh; +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ err = lo_do_open(lo, inode, fi); ++ lo_inode_put(lo, &inode); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_open(req, fi); + } +- fuse_reply_open(req, fi); + } + + static void lo_release(fuse_req_t req, fuse_ino_t ino, +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0009-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/tools/packaging/qemu/patches/5.1.x/0009-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch new file mode 100644 index 0000000000..938dc0eae8 --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0009-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch @@ -0,0 +1,109 @@ +From 51b4b251442ade9163c9c9f953e5db00340ab3f1 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 15:02:07 +0000 +Subject: [PATCH] virtiofsd: optionally return inode pointer from + lo_do_lookup() + +lo_do_lookup() finds an existing inode or allocates a new one. It +increments nlookup so that the inode stays alive until the client +releases it. + +Existing callers don't need the struct lo_inode so the function doesn't +return it. Extend the function to optionally return the inode. The next +commit will need it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Greg Kurz +Message-Id: <20210204150208.367837-3-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index eaec0c5a..a874f509 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -752,11 +752,13 @@ static void posix_locks_value_destroy(gpointer data) + } + + /* +- * Increments nlookup and caller must release refcount using +- * lo_inode_put(&parent). ++ * Increments nlookup on the inode on success. unref_inode_lolocked() must be ++ * called eventually to decrement nlookup again. If inodep is non-NULL, the ++ * inode pointer is stored and the caller must call lo_inode_put(). + */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e, ++ struct lo_inode **inodep) + { + int newfd; + int res; +@@ -765,6 +767,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); + ++ if (inodep) { ++ *inodep = NULL; ++ } ++ + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse + * mount point in guest, but we don't have its inode info in the +@@ -825,7 +831,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +- lo_inode_put(lo, &inode); ++ ++ /* Transfer ownership of inode pointer to caller or drop it */ ++ if (inodep) { ++ *inodep = inode; ++ } else { ++ lo_inode_put(lo, &inode); ++ } ++ + lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, +@@ -860,7 +873,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + return; + } + +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + if (err) { + fuse_reply_err(req, err); + } else { +@@ -968,7 +981,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- saverr = lo_do_lookup(req, parent, name, &e); ++ saverr = lo_do_lookup(req, parent, name, &e, NULL); + if (saverr) { + goto out; + } +@@ -1437,7 +1450,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + if (plus) { + if (!is_dot_or_dotdot(name)) { +- err = lo_do_lookup(req, ino, name, &e); ++ err = lo_do_lookup(req, ino, name, &e, NULL); + if (err) { + goto error; + } +@@ -1635,7 +1648,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + } + if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0010-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/tools/packaging/qemu/patches/5.1.x/0010-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch new file mode 100644 index 0000000000..e4ca30d14e --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0010-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch @@ -0,0 +1,298 @@ +From 0d3f3e7b142814bbd6cda096cb52891cca983c92 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Feb 2021 20:20:00 +0100 +Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) + +A well-behaved FUSE client does not attempt to open special files with +FUSE_OPEN because they are handled on the client side (e.g. device nodes +are handled by client-side device drivers). + +The check to prevent virtiofsd from opening special files is missing in +a few cases, most notably FUSE_OPEN. A malicious client can cause +virtiofsd to open a device node, potentially allowing the guest to +escape. This can be exploited by a modified guest device driver. It is +not exploitable from guest userspace since the guest kernel will handle +special files inside the guest instead of sending FUSE requests. + +This patch fixes this issue by introducing the lo_inode_open() function +to check the file type before opening it. This is a short-term solution +because it does not prevent a compromised virtiofsd process from opening +device nodes on the host. + +Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT +| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not +necessary here. If the file exists and the user did not specify O_EXCL, +open it via lo_do_open(). + +Reported-by: Alex Xu +Fixes: CVE-2020-35517 +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Vivek Goyal +Reviewed-by: Greg Kurz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-4-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 52 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index a874f509..41ff708a 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return fd; + } + ++/* ++ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a ++ * regular file or a directory. ++ * ++ * Use this helper function instead of raw openat(2) to prevent security issues ++ * when a malicious client opens special files such as block device nodes. ++ * Symlink inodes are also rejected since symlinks must already have been ++ * traversed on the client side. ++ */ ++static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, ++ int open_flags) ++{ ++ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); ++ int fd; ++ ++ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { ++ return -EBADF; ++ } ++ ++ /* ++ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier ++ * that the inode is not a special file but if an external process races ++ * with us then symlinks are traversed here. It is not possible to escape ++ * the shared directory since it is mounted as "/" though. ++ */ ++ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); ++ if (fd < 0) { ++ return -errno; ++ } ++ return fd; ++} ++ + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -661,9 +693,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + truncfd = fd; + } else { +- sprintf(procname, "%i", ifd); +- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ truncfd = lo_inode_open(lo, inode, O_RDWR); + if (truncfd < 0) { ++ errno = -truncfd; + goto out_err; + } + } +@@ -768,7 +800,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *dir = lo_inode(req, parent); + + if (inodep) { +- *inodep = NULL; ++ *inodep = NULL; /* in case there is an error */ + } + + /* +@@ -1567,19 +1599,26 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++/* ++ * Open a regular file, set up an fd mapping, and fill out the struct ++ * fuse_file_info for it. If existing_fd is not negative, use that fd instead ++ * opening a new one. Takes ownership of existing_fd. ++ * ++ * Returns 0 on success or a positive errno. ++ */ + static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, +- struct fuse_file_info *fi) ++ int existing_fd, struct fuse_file_info *fi) + { +- char buf[64]; + ssize_t fh; +- int fd; ++ int fd = existing_fd; + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- sprintf(buf, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return errno; ++ if (fd < 0) { ++ fd = lo_inode_open(lo, inode, fi->flags); ++ if (fd < 0) { ++ return -fd; ++ } + } + + pthread_mutex_lock(&lo->mutex); +@@ -1602,9 +1641,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +- int fd; ++ int fd = -1; + struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; ++ struct lo_inode *inode = NULL; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1630,36 +1670,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, +- mode); ++ /* Try to create a new file but don't open existing files */ ++ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); + err = fd == -1 ? errno : 0; +- lo_restore_cred(&old); + +- if (!err) { +- ssize_t fh; ++ lo_restore_cred(&old); + +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(lo, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- err = ENOMEM; +- goto out; +- } ++ /* Ignore the error if file exists and O_EXCL was not given */ ++ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { ++ goto out; ++ } + +- fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e, NULL); ++ err = lo_do_lookup(req, parent, name, &e, &inode); ++ if (err) { ++ goto out; + } +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ ++ err = lo_do_open(lo, inode, fd, fi); ++ fd = -1; /* lo_do_open() takes ownership of fd */ ++ if (err) { ++ /* Undo lo_do_lookup() nlookup ref */ ++ unref_inode_lolocked(lo, inode, 1); + } + + out: ++ lo_inode_put(lo, &inode); + lo_inode_put(lo, &parent_inode); + + if (err) { ++ if (fd >= 0) { ++ close(fd); ++ } ++ + fuse_reply_err(req, err); + } else { + fuse_reply_create(req, &e, fi); +@@ -1673,7 +1715,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + pid_t pid, int *err) + { + struct lo_inode_plock *plock; +- char procname[64]; + int fd; + + plock = +@@ -1690,12 +1731,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + } + + /* Open another instance of file which can be used for ofd locks. */ +- sprintf(procname, "%i", inode->fd); +- + /* TODO: What if file is not writable? */ +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd == -1) { +- *err = errno; ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ *err = -fd; + free(plock); + return NULL; + } +@@ -1842,7 +1881,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return; + } + +- err = lo_do_open(lo, inode, fi); ++ err = lo_do_open(lo, inode, -1, fi); + lo_inode_put(lo, &inode); + if (err) { + fuse_reply_err(req, err); +@@ -1898,39 +1937,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); + int res; + int fd; +- char *buf; + + fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, + (void *)fi); + +- if (!fi) { +- struct lo_data *lo = lo_data(req); +- +- res = asprintf(&buf, "%i", lo_fd(req, ino)); +- if (res == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } + +- fd = openat(lo->proc_self_fd, buf, O_RDWR); +- free(buf); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); ++ if (!fi) { ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ res = -fd; ++ goto out; + } + } else { + fd = lo_fi_fd(req, fi); + } + + if (datasync) { +- res = fdatasync(fd); ++ res = fdatasync(fd) == -1 ? errno : 0; + } else { +- res = fsync(fd); ++ res = fsync(fd) == -1 ? errno : 0; + } + if (!fi) { + close(fd); + } +- fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ lo_inode_put(lo, &inode); ++ fuse_reply_err(req, res); + } + + static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0011-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch b/tools/packaging/qemu/patches/5.1.x/0011-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch new file mode 100644 index 0000000000..c3c47b3d08 --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0011-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch @@ -0,0 +1,31 @@ +From 7670d5d5061a8d959a367bdd9d86b6b97a3bf4e9 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 4 Feb 2021 20:23:16 +0100 +Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist + +This is how glibc implements lseek(2) on POWER. + +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 +Signed-off-by: Greg Kurz +Message-Id: <20210121171540.1449777-1-groug@kaod.org> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/seccomp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 7f93fe87..57642ff8 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(linkat), + SCMP_SYS(listxattr), + SCMP_SYS(lseek), ++ SCMP_SYS(_llseek), /* For POWER */ + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), + SCMP_SYS(mknodat), +-- +2.29.2 + diff --git a/tools/packaging/qemu/patches/5.1.x/0012-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch b/tools/packaging/qemu/patches/5.1.x/0012-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch new file mode 100644 index 0000000000..65c1c0df33 --- /dev/null +++ b/tools/packaging/qemu/patches/5.1.x/0012-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch @@ -0,0 +1,33 @@ +From 82bc7d7c11ff868ca9b0f663e847f535eab4e3ac Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 4 Feb 2021 20:24:28 +0100 +Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist + +This is how linux restarts some system calls after SIGSTOP/SIGCONT. +This is needed to avoid virtiofsd termination when resuming execution +under GDB for example. + +Signed-off-by: Greg Kurz +Message-Id: <20210201193305.136390-1-groug@kaod.org> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/seccomp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 57642ff8..004f5026 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(renameat), + SCMP_SYS(renameat2), + SCMP_SYS(removexattr), ++ SCMP_SYS(restart_syscall), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), +-- +2.29.2 +