mirror of
				https://github.com/kata-containers/kata-containers.git
				synced 2025-10-24 21:51:37 +00:00 
			
		
		
		
	qemu: Add security fixes for CVE-2020-35517
This series is based on https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01787.html, and was kindly brought up by David Gilbert. Fixes: #1361 Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
This commit is contained in:
		| @@ -0,0 +1,104 @@ | |||||||
|  | From f876aae825d77aec1a735ecf5b2bc821eba11913 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Date: Thu, 4 Feb 2021 22:22:03 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options | ||||||
|  |  | ||||||
|  | Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT | ||||||
|  | open flag of guest applications will be discarded by virtiofsd. While | ||||||
|  | this behavior makes it consistent with the virtio-9p scheme when guest | ||||||
|  | applications use direct I/O, we no longer have any chance to bypass the | ||||||
|  | host page cache. | ||||||
|  |  | ||||||
|  | Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o | ||||||
|  |  no_allow_direct_io' option is added, or none of '-o allow_direct_io' or | ||||||
|  |  '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to | ||||||
|  |  0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' | ||||||
|  | is added to the starting command-line, 'allow_direct_io' will be set to | ||||||
|  | 1, so that the O_DIRECT flags will be retained and host page cache can | ||||||
|  | be bypassed. | ||||||
|  |  | ||||||
|  | Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ | ||||||
|  |  1 file changed, 14 insertions(+), 6 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index f7b9c1d2..bec60352 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -151,6 +151,7 @@ struct lo_data { | ||||||
|  |      int timeout_set; | ||||||
|  |      int readdirplus_set; | ||||||
|  |      int readdirplus_clear; | ||||||
|  | +    int allow_direct_io; | ||||||
|  |      struct lo_inode root; | ||||||
|  |      GHashTable *inodes; /* protected by lo->mutex */ | ||||||
|  |      struct lo_map ino_map; /* protected by lo->mutex */ | ||||||
|  | @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { | ||||||
|  |      { "norace", offsetof(struct lo_data, norace), 1 }, | ||||||
|  |      { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, | ||||||
|  |      { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | ||||||
|  | +    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, | ||||||
|  | +    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | ||||||
|  |      FUSE_OPT_END | ||||||
|  |  }; | ||||||
|  |  static bool use_syslog = false; | ||||||
|  | @@ -1677,7 +1680,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, | ||||||
|  |      fuse_reply_err(req, 0); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||||
|  | +static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  | +                              struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  |      /* | ||||||
|  |       * With writeback cache, kernel may send read requests even | ||||||
|  | @@ -1702,10 +1706,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||||
|  |   | ||||||
|  |      /* | ||||||
|  |       * O_DIRECT in guest should not necessarily mean bypassing page | ||||||
|  | -     * cache on host as well. If somebody needs that behavior, it | ||||||
|  | -     * probably should be a configuration knob in daemon. | ||||||
|  | +     * cache on host as well. Therefore, we discard it by default | ||||||
|  | +     * ('-o no_allow_direct_io'). If somebody needs that behavior, | ||||||
|  | +     * the '-o allow_direct_io' option should be set. | ||||||
|  |       */ | ||||||
|  | -    fi->flags &= ~O_DIRECT; | ||||||
|  | +    if (!allow_direct_io) { | ||||||
|  | +        fi->flags &= ~O_DIRECT; | ||||||
|  | +    } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  | @@ -1737,7 +1744,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, fi); | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  |      fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||||
|  |                  mode); | ||||||
|  | @@ -1947,7 +1954,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||||
|  |               fi->flags); | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, fi); | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  |      sprintf(buf, "%i", lo_fd(req, ino)); | ||||||
|  |      fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | @@ -2852,6 +2859,7 @@ int main(int argc, char *argv[]) | ||||||
|  |          .debug = 0, | ||||||
|  |          .writeback = 0, | ||||||
|  |          .posix_lock = 1, | ||||||
|  | +	.allow_direct_io = 0, | ||||||
|  |          .proc_self_fd = -1, | ||||||
|  |      }; | ||||||
|  |      struct lo_map_elem *root_elem; | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,145 @@ | |||||||
|  | From 7979901047bdad744b1cfaedb1fa99dc4c5738b8 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 20:16:42 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() | ||||||
|  |  | ||||||
|  | Both lo_open() and lo_create() have similar code to open a file. Extract | ||||||
|  | a common lo_do_open() function from lo_open() that will be used by | ||||||
|  | lo_create() in a later commit. | ||||||
|  |  | ||||||
|  | Since lo_do_open() does not otherwise need fuse_req_t req, convert | ||||||
|  | lo_add_fd_mapping() to use struct lo_data *lo instead. | ||||||
|  |  | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20210204150208.367837-2-stefanha@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ | ||||||
|  |  1 file changed, 46 insertions(+), 27 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index bec60352..6eb6a754 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* Assumes lo->mutex is held */ | ||||||
|  | -static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) | ||||||
|  | +static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) | ||||||
|  |  { | ||||||
|  |      struct lo_map_elem *elem; | ||||||
|  |   | ||||||
|  | -    elem = lo_map_alloc_elem(&lo_data(req)->fd_map); | ||||||
|  | +    elem = lo_map_alloc_elem(&lo->fd_map); | ||||||
|  |      if (!elem) { | ||||||
|  |          return -1; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      elem->fd = fd; | ||||||
|  | -    return elem - lo_data(req)->fd_map.elems; | ||||||
|  | +    return elem - lo->fd_map.elems; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* Assumes lo->mutex is held */ | ||||||
|  | @@ -1715,6 +1715,38 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  |      } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | +                      struct fuse_file_info *fi) | ||||||
|  | +{ | ||||||
|  | +    char buf[64]; | ||||||
|  | +    ssize_t fh; | ||||||
|  | +    int fd; | ||||||
|  | + | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  | + | ||||||
|  | +    sprintf(buf, "%i", inode->fd); | ||||||
|  | +    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | +    if (fd == -1) { | ||||||
|  | +        return errno; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    pthread_mutex_lock(&lo->mutex); | ||||||
|  | +    fh = lo_add_fd_mapping(lo, fd); | ||||||
|  | +    pthread_mutex_unlock(&lo->mutex); | ||||||
|  | +    if (fh == -1) { | ||||||
|  | +        close(fd); | ||||||
|  | +        return ENOMEM; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    fi->fh = fh; | ||||||
|  | +    if (lo->cache == CACHE_NONE) { | ||||||
|  | +        fi->direct_io = 1; | ||||||
|  | +    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | +        fi->keep_cache = 1; | ||||||
|  | +    } | ||||||
|  | +    return 0; | ||||||
|  | +} | ||||||
|  | + | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |                        mode_t mode, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | @@ -1755,7 +1787,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          ssize_t fh; | ||||||
|  |   | ||||||
|  |          pthread_mutex_lock(&lo->mutex); | ||||||
|  | -        fh = lo_add_fd_mapping(req, fd); | ||||||
|  | +        fh = lo_add_fd_mapping(lo, fd); | ||||||
|  |          pthread_mutex_unlock(&lo->mutex); | ||||||
|  |          if (fh == -1) { | ||||||
|  |              close(fd); | ||||||
|  | @@ -1946,38 +1978,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||||
|  |   | ||||||
|  |  static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    int fd; | ||||||
|  | -    ssize_t fh; | ||||||
|  | -    char buf[64]; | ||||||
|  |      struct lo_data *lo = lo_data(req); | ||||||
|  | +    struct lo_inode *inode = lo_inode(req, ino); | ||||||
|  | +    int err; | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||||
|  |               fi->flags); | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  | - | ||||||
|  | -    sprintf(buf, "%i", lo_fd(req, ino)); | ||||||
|  | -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        return (void)fuse_reply_err(req, errno); | ||||||
|  | -    } | ||||||
|  | - | ||||||
|  | -    pthread_mutex_lock(&lo->mutex); | ||||||
|  | -    fh = lo_add_fd_mapping(req, fd); | ||||||
|  | -    pthread_mutex_unlock(&lo->mutex); | ||||||
|  | -    if (fh == -1) { | ||||||
|  | -        close(fd); | ||||||
|  | -        fuse_reply_err(req, ENOMEM); | ||||||
|  | +    if (!inode) { | ||||||
|  | +        fuse_reply_err(req, EBADF); | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    fi->fh = fh; | ||||||
|  | -    if (lo->cache == CACHE_NONE) { | ||||||
|  | -        fi->direct_io = 1; | ||||||
|  | -    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | -        fi->keep_cache = 1; | ||||||
|  | +    err = lo_do_open(lo, inode, fi); | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  | +    if (err) { | ||||||
|  | +        fuse_reply_err(req, err); | ||||||
|  | +    } else { | ||||||
|  | +        fuse_reply_open(req, fi); | ||||||
|  |      } | ||||||
|  | -    fuse_reply_open(req, fi); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_release(fuse_req_t req, fuse_ino_t ino, | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,109 @@ | |||||||
|  | From 6772514121ccfecc0824b8f2ec39377368c564a0 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 15:02:07 +0000 | ||||||
|  | Subject: [PATCH] virtiofsd: optionally return inode pointer from | ||||||
|  |  lo_do_lookup() | ||||||
|  |  | ||||||
|  | lo_do_lookup() finds an existing inode or allocates a new one. It | ||||||
|  | increments nlookup so that the inode stays alive until the client | ||||||
|  | releases it. | ||||||
|  |  | ||||||
|  | Existing callers don't need the struct lo_inode so the function doesn't | ||||||
|  | return it. Extend the function to optionally return the inode. The next | ||||||
|  | commit will need it. | ||||||
|  |  | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210204150208.367837-3-stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- | ||||||
|  |  1 file changed, 21 insertions(+), 8 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index 6eb6a754..422ea75f 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -881,11 +881,13 @@ static void posix_locks_value_destroy(gpointer data) | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* | ||||||
|  | - * Increments nlookup and caller must release refcount using | ||||||
|  | - * lo_inode_put(&parent). | ||||||
|  | + * Increments nlookup on the inode on success. unref_inode_lolocked() must be | ||||||
|  | + * called eventually to decrement nlookup again. If inodep is non-NULL, the | ||||||
|  | + * inode pointer is stored and the caller must call lo_inode_put(). | ||||||
|  |   */ | ||||||
|  |  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  | -                        struct fuse_entry_param *e) | ||||||
|  | +                        struct fuse_entry_param *e, | ||||||
|  | +                        struct lo_inode **inodep) | ||||||
|  |  { | ||||||
|  |      int newfd; | ||||||
|  |      int res; | ||||||
|  | @@ -894,6 +896,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |      struct lo_inode *inode = NULL; | ||||||
|  |      struct lo_inode *dir = lo_inode(req, parent); | ||||||
|  |   | ||||||
|  | +    if (inodep) { | ||||||
|  | +        *inodep = NULL; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  |      /* | ||||||
|  |       * name_to_handle_at() and open_by_handle_at() can reach here with fuse | ||||||
|  |       * mount point in guest, but we don't have its inode info in the | ||||||
|  | @@ -954,7 +960,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          pthread_mutex_unlock(&lo->mutex); | ||||||
|  |      } | ||||||
|  |      e->ino = inode->fuse_ino; | ||||||
|  | -    lo_inode_put(lo, &inode); | ||||||
|  | + | ||||||
|  | +    /* Transfer ownership of inode pointer to caller or drop it */ | ||||||
|  | +    if (inodep) { | ||||||
|  | +        *inodep = inode; | ||||||
|  | +    } else { | ||||||
|  | +        lo_inode_put(lo, &inode); | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  |      lo_inode_put(lo, &dir); | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent, | ||||||
|  | @@ -989,7 +1002,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    err = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +    err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      if (err) { | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  |      } else { | ||||||
|  | @@ -1097,7 +1110,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    saverr = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +    saverr = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      if (saverr) { | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  | @@ -1598,7 +1611,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | ||||||
|  |   | ||||||
|  |          if (plus) { | ||||||
|  |              if (!is_dot_or_dotdot(name)) { | ||||||
|  | -                err = lo_do_lookup(req, ino, name, &e); | ||||||
|  | +                err = lo_do_lookup(req, ino, name, &e, NULL); | ||||||
|  |                  if (err) { | ||||||
|  |                      goto error; | ||||||
|  |                  } | ||||||
|  | @@ -1796,7 +1809,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          } | ||||||
|  |   | ||||||
|  |          fi->fh = fh; | ||||||
|  | -        err = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      } | ||||||
|  |      if (lo->cache == CACHE_NONE) { | ||||||
|  |          fi->direct_io = 1; | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,298 @@ | |||||||
|  | From 19f9b9c7fb8433830b661441c6646cda42223354 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 20:20:00 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) | ||||||
|  |  | ||||||
|  | A well-behaved FUSE client does not attempt to open special files with | ||||||
|  | FUSE_OPEN because they are handled on the client side (e.g. device nodes | ||||||
|  | are handled by client-side device drivers). | ||||||
|  |  | ||||||
|  | The check to prevent virtiofsd from opening special files is missing in | ||||||
|  | a few cases, most notably FUSE_OPEN. A malicious client can cause | ||||||
|  | virtiofsd to open a device node, potentially allowing the guest to | ||||||
|  | escape. This can be exploited by a modified guest device driver. It is | ||||||
|  | not exploitable from guest userspace since the guest kernel will handle | ||||||
|  | special files inside the guest instead of sending FUSE requests. | ||||||
|  |  | ||||||
|  | This patch fixes this issue by introducing the lo_inode_open() function | ||||||
|  | to check the file type before opening it. This is a short-term solution | ||||||
|  | because it does not prevent a compromised virtiofsd process from opening | ||||||
|  | device nodes on the host. | ||||||
|  |  | ||||||
|  | Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT | ||||||
|  | | O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not | ||||||
|  | necessary here. If the file exists and the user did not specify O_EXCL, | ||||||
|  | open it via lo_do_open(). | ||||||
|  |  | ||||||
|  | Reported-by: Alex Xu <alex@alxu.ca> | ||||||
|  | Fixes: CVE-2020-35517 | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Reviewed-by: Vivek Goyal <vgoyal@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20210204150208.367837-4-stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- | ||||||
|  |  1 file changed, 92 insertions(+), 52 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index 422ea75f..da4310e3 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) | ||||||
|  |      return fd; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* | ||||||
|  | + * Open a file descriptor for an inode. Returns -EBADF if the inode is not a | ||||||
|  | + * regular file or a directory. | ||||||
|  | + * | ||||||
|  | + * Use this helper function instead of raw openat(2) to prevent security issues | ||||||
|  | + * when a malicious client opens special files such as block device nodes. | ||||||
|  | + * Symlink inodes are also rejected since symlinks must already have been | ||||||
|  | + * traversed on the client side. | ||||||
|  | + */ | ||||||
|  | +static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | +                         int open_flags) | ||||||
|  | +{ | ||||||
|  | +    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); | ||||||
|  | +    int fd; | ||||||
|  | + | ||||||
|  | +    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { | ||||||
|  | +        return -EBADF; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    /* | ||||||
|  | +     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier | ||||||
|  | +     * that the inode is not a special file but if an external process races | ||||||
|  | +     * with us then symlinks are traversed here. It is not possible to escape | ||||||
|  | +     * the shared directory since it is mounted as "/" though. | ||||||
|  | +     */ | ||||||
|  | +    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        return -errno; | ||||||
|  | +    } | ||||||
|  | +    return fd; | ||||||
|  | +} | ||||||
|  | + | ||||||
|  |  static void lo_init(void *userdata, struct fuse_conn_info *conn) | ||||||
|  |  { | ||||||
|  |      struct lo_data *lo = (struct lo_data *)userdata; | ||||||
|  | @@ -791,9 +823,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, | ||||||
|  |          if (fi) { | ||||||
|  |              truncfd = fd; | ||||||
|  |          } else { | ||||||
|  | -            sprintf(procname, "%i", ifd); | ||||||
|  | -            truncfd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||||
|  | +            truncfd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  |              if (truncfd < 0) { | ||||||
|  | +                errno = -truncfd; | ||||||
|  |                  goto out_err; | ||||||
|  |              } | ||||||
|  |          } | ||||||
|  | @@ -897,7 +929,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |      struct lo_inode *dir = lo_inode(req, parent); | ||||||
|  |   | ||||||
|  |      if (inodep) { | ||||||
|  | -        *inodep = NULL; | ||||||
|  | +        *inodep = NULL; /* in case there is an error */ | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      /* | ||||||
|  | @@ -1728,19 +1760,26 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  |      } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* | ||||||
|  | + * Open a regular file, set up an fd mapping, and fill out the struct | ||||||
|  | + * fuse_file_info for it. If existing_fd is not negative, use that fd instead | ||||||
|  | + * opening a new one. Takes ownership of existing_fd. | ||||||
|  | + * | ||||||
|  | + * Returns 0 on success or a positive errno. | ||||||
|  | + */ | ||||||
|  |  static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | -                      struct fuse_file_info *fi) | ||||||
|  | +                      int existing_fd, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    char buf[64]; | ||||||
|  |      ssize_t fh; | ||||||
|  | -    int fd; | ||||||
|  | +    int fd = existing_fd; | ||||||
|  |   | ||||||
|  |      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  | -    sprintf(buf, "%i", inode->fd); | ||||||
|  | -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        return errno; | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        fd = lo_inode_open(lo, inode, fi->flags); | ||||||
|  | +        if (fd < 0) { | ||||||
|  | +            return -fd; | ||||||
|  | +        } | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      pthread_mutex_lock(&lo->mutex); | ||||||
|  | @@ -1763,9 +1802,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |                        mode_t mode, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    int fd; | ||||||
|  | +    int fd = -1; | ||||||
|  |      struct lo_data *lo = lo_data(req); | ||||||
|  |      struct lo_inode *parent_inode; | ||||||
|  | +    struct lo_inode *inode = NULL; | ||||||
|  |      struct fuse_entry_param e; | ||||||
|  |      int err; | ||||||
|  |      struct lo_cred old = {}; | ||||||
|  | @@ -1791,36 +1831,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |   | ||||||
|  |      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  | -    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||||
|  | -                mode); | ||||||
|  | +    /* Try to create a new file but don't open existing files */ | ||||||
|  | +    fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); | ||||||
|  |      err = fd == -1 ? errno : 0; | ||||||
|  | -    lo_restore_cred(&old); | ||||||
|  |   | ||||||
|  | -    if (!err) { | ||||||
|  | -        ssize_t fh; | ||||||
|  | +    lo_restore_cred(&old); | ||||||
|  |   | ||||||
|  | -        pthread_mutex_lock(&lo->mutex); | ||||||
|  | -        fh = lo_add_fd_mapping(lo, fd); | ||||||
|  | -        pthread_mutex_unlock(&lo->mutex); | ||||||
|  | -        if (fh == -1) { | ||||||
|  | -            close(fd); | ||||||
|  | -            err = ENOMEM; | ||||||
|  | -            goto out; | ||||||
|  | -        } | ||||||
|  | +    /* Ignore the error if file exists and O_EXCL was not given */ | ||||||
|  | +    if (err && (err != EEXIST || (fi->flags & O_EXCL))) { | ||||||
|  | +        goto out; | ||||||
|  | +    } | ||||||
|  |   | ||||||
|  | -        fi->fh = fh; | ||||||
|  | -        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  | +    err = lo_do_lookup(req, parent, name, &e, &inode); | ||||||
|  | +    if (err) { | ||||||
|  | +        goto out; | ||||||
|  |      } | ||||||
|  | -    if (lo->cache == CACHE_NONE) { | ||||||
|  | -        fi->direct_io = 1; | ||||||
|  | -    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | -        fi->keep_cache = 1; | ||||||
|  | + | ||||||
|  | +    err = lo_do_open(lo, inode, fd, fi); | ||||||
|  | +    fd = -1; /* lo_do_open() takes ownership of fd */ | ||||||
|  | +    if (err) { | ||||||
|  | +        /* Undo lo_do_lookup() nlookup ref */ | ||||||
|  | +        unref_inode_lolocked(lo, inode, 1); | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |  out: | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  |      lo_inode_put(lo, &parent_inode); | ||||||
|  |   | ||||||
|  |      if (err) { | ||||||
|  | +        if (fd >= 0) { | ||||||
|  | +            close(fd); | ||||||
|  | +        } | ||||||
|  | + | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  |      } else { | ||||||
|  |          fuse_reply_create(req, &e, fi); | ||||||
|  | @@ -1834,7 +1876,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||||
|  |                                                        pid_t pid, int *err) | ||||||
|  |  { | ||||||
|  |      struct lo_inode_plock *plock; | ||||||
|  | -    char procname[64]; | ||||||
|  |      int fd; | ||||||
|  |   | ||||||
|  |      plock = | ||||||
|  | @@ -1851,12 +1892,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      /* Open another instance of file which can be used for ofd locks. */ | ||||||
|  | -    sprintf(procname, "%i", inode->fd); | ||||||
|  | - | ||||||
|  |      /* TODO: What if file is not writable? */ | ||||||
|  | -    fd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        *err = errno; | ||||||
|  | +    fd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        *err = -fd; | ||||||
|  |          free(plock); | ||||||
|  |          return NULL; | ||||||
|  |      } | ||||||
|  | @@ -2003,7 +2042,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    err = lo_do_open(lo, inode, fi); | ||||||
|  | +    err = lo_do_open(lo, inode, -1, fi); | ||||||
|  |      lo_inode_put(lo, &inode); | ||||||
|  |      if (err) { | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  | @@ -2059,39 +2098,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |  static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||||
|  |                       struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | +    struct lo_inode *inode = lo_inode(req, ino); | ||||||
|  | +    struct lo_data *lo = lo_data(req); | ||||||
|  |      int res; | ||||||
|  |      int fd; | ||||||
|  | -    char *buf; | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | ||||||
|  |               (void *)fi); | ||||||
|  |   | ||||||
|  | -    if (!fi) { | ||||||
|  | -        struct lo_data *lo = lo_data(req); | ||||||
|  | - | ||||||
|  | -        res = asprintf(&buf, "%i", lo_fd(req, ino)); | ||||||
|  | -        if (res == -1) { | ||||||
|  | -            return (void)fuse_reply_err(req, errno); | ||||||
|  | -        } | ||||||
|  | +    if (!inode) { | ||||||
|  | +        fuse_reply_err(req, EBADF); | ||||||
|  | +        return; | ||||||
|  | +    } | ||||||
|  |   | ||||||
|  | -        fd = openat(lo->proc_self_fd, buf, O_RDWR); | ||||||
|  | -        free(buf); | ||||||
|  | -        if (fd == -1) { | ||||||
|  | -            return (void)fuse_reply_err(req, errno); | ||||||
|  | +    if (!fi) { | ||||||
|  | +        fd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  | +        if (fd < 0) { | ||||||
|  | +            res = -fd; | ||||||
|  | +            goto out; | ||||||
|  |          } | ||||||
|  |      } else { | ||||||
|  |          fd = lo_fi_fd(req, fi); | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      if (datasync) { | ||||||
|  | -        res = fdatasync(fd); | ||||||
|  | +        res = fdatasync(fd) == -1 ? errno : 0; | ||||||
|  |      } else { | ||||||
|  | -        res = fsync(fd); | ||||||
|  | +        res = fsync(fd) == -1 ? errno : 0; | ||||||
|  |      } | ||||||
|  |      if (!fi) { | ||||||
|  |          close(fd); | ||||||
|  |      } | ||||||
|  | -    fuse_reply_err(req, res == -1 ? errno : 0); | ||||||
|  | +out: | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  | +    fuse_reply_err(req, res); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,31 @@ | |||||||
|  | From a68a49b83f6c2694a080bb3761f566bd84b7ca26 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Greg Kurz <groug@kaod.org> | ||||||
|  | Date: Thu, 4 Feb 2021 20:23:16 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist | ||||||
|  |  | ||||||
|  | This is how glibc implements lseek(2) on POWER. | ||||||
|  |  | ||||||
|  | BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 | ||||||
|  | Signed-off-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210121171540.1449777-1-groug@kaod.org> | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/seccomp.c | 1 + | ||||||
|  |  1 file changed, 1 insertion(+) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||||
|  | index 7f93fe87..57642ff8 100644 | ||||||
|  | --- a/tools/virtiofsd/seccomp.c | ||||||
|  | +++ b/tools/virtiofsd/seccomp.c | ||||||
|  | @@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { | ||||||
|  |      SCMP_SYS(linkat), | ||||||
|  |      SCMP_SYS(listxattr), | ||||||
|  |      SCMP_SYS(lseek), | ||||||
|  | +    SCMP_SYS(_llseek), /* For POWER */ | ||||||
|  |      SCMP_SYS(madvise), | ||||||
|  |      SCMP_SYS(mkdirat), | ||||||
|  |      SCMP_SYS(mknodat), | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,33 @@ | |||||||
|  | From c013c9a1d796d1feae143f02b3c654f0a42f7055 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Greg Kurz <groug@kaod.org> | ||||||
|  | Date: Thu, 4 Feb 2021 20:24:28 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist | ||||||
|  |  | ||||||
|  | This is how linux restarts some system calls after SIGSTOP/SIGCONT. | ||||||
|  | This is needed to avoid virtiofsd termination when resuming execution | ||||||
|  | under GDB for example. | ||||||
|  |  | ||||||
|  | Signed-off-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210201193305.136390-1-groug@kaod.org> | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/seccomp.c | 1 + | ||||||
|  |  1 file changed, 1 insertion(+) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||||
|  | index 57642ff8..004f5026 100644 | ||||||
|  | --- a/tools/virtiofsd/seccomp.c | ||||||
|  | +++ b/tools/virtiofsd/seccomp.c | ||||||
|  | @@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { | ||||||
|  |      SCMP_SYS(renameat), | ||||||
|  |      SCMP_SYS(renameat2), | ||||||
|  |      SCMP_SYS(removexattr), | ||||||
|  | +    SCMP_SYS(restart_syscall), | ||||||
|  |      SCMP_SYS(rt_sigaction), | ||||||
|  |      SCMP_SYS(rt_sigprocmask), | ||||||
|  |      SCMP_SYS(rt_sigreturn), | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,104 @@ | |||||||
|  | From c5c8e946f8200de1c6d917949e98484908b43f21 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Date: Thu, 4 Feb 2021 22:11:28 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options | ||||||
|  |  | ||||||
|  | Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT | ||||||
|  | open flag of guest applications will be discarded by virtiofsd. While | ||||||
|  | this behavior makes it consistent with the virtio-9p scheme when guest | ||||||
|  | applications use direct I/O, we no longer have any chance to bypass the | ||||||
|  | host page cache. | ||||||
|  |  | ||||||
|  | Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o | ||||||
|  |  no_allow_direct_io' option is added, or none of '-o allow_direct_io' or | ||||||
|  |  '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to | ||||||
|  |  0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' | ||||||
|  | is added to the starting command-line, 'allow_direct_io' will be set to | ||||||
|  | 1, so that the O_DIRECT flags will be retained and host page cache can | ||||||
|  | be bypassed. | ||||||
|  |  | ||||||
|  | Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ | ||||||
|  |  1 file changed, 14 insertions(+), 6 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index 94e0de2d..289c9985 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -151,6 +151,7 @@ struct lo_data { | ||||||
|  |      int timeout_set; | ||||||
|  |      int readdirplus_set; | ||||||
|  |      int readdirplus_clear; | ||||||
|  | +    int allow_direct_io; | ||||||
|  |      struct lo_inode root; | ||||||
|  |      GHashTable *inodes; /* protected by lo->mutex */ | ||||||
|  |      struct lo_map ino_map; /* protected by lo->mutex */ | ||||||
|  | @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { | ||||||
|  |      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, | ||||||
|  |      { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, | ||||||
|  |      { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | ||||||
|  | +    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, | ||||||
|  | +    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | ||||||
|  |      FUSE_OPT_END | ||||||
|  |  }; | ||||||
|  |  static bool use_syslog = false; | ||||||
|  | @@ -1516,7 +1519,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, | ||||||
|  |      fuse_reply_err(req, 0); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||||
|  | +static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  | +                              struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  |      /* | ||||||
|  |       * With writeback cache, kernel may send read requests even | ||||||
|  | @@ -1541,10 +1545,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||||
|  |   | ||||||
|  |      /* | ||||||
|  |       * O_DIRECT in guest should not necessarily mean bypassing page | ||||||
|  | -     * cache on host as well. If somebody needs that behavior, it | ||||||
|  | -     * probably should be a configuration knob in daemon. | ||||||
|  | +     * cache on host as well. Therefore, we discard it by default | ||||||
|  | +     * ('-o no_allow_direct_io'). If somebody needs that behavior, | ||||||
|  | +     * the '-o allow_direct_io' option should be set. | ||||||
|  |       */ | ||||||
|  | -    fi->flags &= ~O_DIRECT; | ||||||
|  | +    if (!allow_direct_io) { | ||||||
|  | +        fi->flags &= ~O_DIRECT; | ||||||
|  | +    } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  | @@ -1576,7 +1583,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, fi); | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  |      fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||||
|  |                  mode); | ||||||
|  | @@ -1786,7 +1793,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||||
|  |               fi->flags); | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, fi); | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  |      sprintf(buf, "%i", lo_fd(req, ino)); | ||||||
|  |      fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | @@ -2824,6 +2831,7 @@ int main(int argc, char *argv[]) | ||||||
|  |          .debug = 0, | ||||||
|  |          .writeback = 0, | ||||||
|  |          .posix_lock = 1, | ||||||
|  | +	.allow_direct_io = 0, | ||||||
|  |          .proc_self_fd = -1, | ||||||
|  |      }; | ||||||
|  |      struct lo_map_elem *root_elem; | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,145 @@ | |||||||
|  | From 04600a6b1ad6376e6da955a929196b9c19fa7f4a Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 20:16:42 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() | ||||||
|  |  | ||||||
|  | Both lo_open() and lo_create() have similar code to open a file. Extract | ||||||
|  | a common lo_do_open() function from lo_open() that will be used by | ||||||
|  | lo_create() in a later commit. | ||||||
|  |  | ||||||
|  | Since lo_do_open() does not otherwise need fuse_req_t req, convert | ||||||
|  | lo_add_fd_mapping() to use struct lo_data *lo instead. | ||||||
|  |  | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20210204150208.367837-2-stefanha@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ | ||||||
|  |  1 file changed, 46 insertions(+), 27 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index 289c9985..eaec0c5a 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* Assumes lo->mutex is held */ | ||||||
|  | -static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) | ||||||
|  | +static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) | ||||||
|  |  { | ||||||
|  |      struct lo_map_elem *elem; | ||||||
|  |   | ||||||
|  | -    elem = lo_map_alloc_elem(&lo_data(req)->fd_map); | ||||||
|  | +    elem = lo_map_alloc_elem(&lo->fd_map); | ||||||
|  |      if (!elem) { | ||||||
|  |          return -1; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      elem->fd = fd; | ||||||
|  | -    return elem - lo_data(req)->fd_map.elems; | ||||||
|  | +    return elem - lo->fd_map.elems; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* Assumes lo->mutex is held */ | ||||||
|  | @@ -1554,6 +1554,38 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  |      } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | +                      struct fuse_file_info *fi) | ||||||
|  | +{ | ||||||
|  | +    char buf[64]; | ||||||
|  | +    ssize_t fh; | ||||||
|  | +    int fd; | ||||||
|  | + | ||||||
|  | +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  | + | ||||||
|  | +    sprintf(buf, "%i", inode->fd); | ||||||
|  | +    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | +    if (fd == -1) { | ||||||
|  | +        return errno; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    pthread_mutex_lock(&lo->mutex); | ||||||
|  | +    fh = lo_add_fd_mapping(lo, fd); | ||||||
|  | +    pthread_mutex_unlock(&lo->mutex); | ||||||
|  | +    if (fh == -1) { | ||||||
|  | +        close(fd); | ||||||
|  | +        return ENOMEM; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    fi->fh = fh; | ||||||
|  | +    if (lo->cache == CACHE_NONE) { | ||||||
|  | +        fi->direct_io = 1; | ||||||
|  | +    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | +        fi->keep_cache = 1; | ||||||
|  | +    } | ||||||
|  | +    return 0; | ||||||
|  | +} | ||||||
|  | + | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |                        mode_t mode, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | @@ -1594,7 +1626,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          ssize_t fh; | ||||||
|  |   | ||||||
|  |          pthread_mutex_lock(&lo->mutex); | ||||||
|  | -        fh = lo_add_fd_mapping(req, fd); | ||||||
|  | +        fh = lo_add_fd_mapping(lo, fd); | ||||||
|  |          pthread_mutex_unlock(&lo->mutex); | ||||||
|  |          if (fh == -1) { | ||||||
|  |              close(fd); | ||||||
|  | @@ -1785,38 +1817,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||||
|  |   | ||||||
|  |  static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    int fd; | ||||||
|  | -    ssize_t fh; | ||||||
|  | -    char buf[64]; | ||||||
|  |      struct lo_data *lo = lo_data(req); | ||||||
|  | +    struct lo_inode *inode = lo_inode(req, ino); | ||||||
|  | +    int err; | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||||
|  |               fi->flags); | ||||||
|  |   | ||||||
|  | -    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  | - | ||||||
|  | -    sprintf(buf, "%i", lo_fd(req, ino)); | ||||||
|  | -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        return (void)fuse_reply_err(req, errno); | ||||||
|  | -    } | ||||||
|  | - | ||||||
|  | -    pthread_mutex_lock(&lo->mutex); | ||||||
|  | -    fh = lo_add_fd_mapping(req, fd); | ||||||
|  | -    pthread_mutex_unlock(&lo->mutex); | ||||||
|  | -    if (fh == -1) { | ||||||
|  | -        close(fd); | ||||||
|  | -        fuse_reply_err(req, ENOMEM); | ||||||
|  | +    if (!inode) { | ||||||
|  | +        fuse_reply_err(req, EBADF); | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    fi->fh = fh; | ||||||
|  | -    if (lo->cache == CACHE_NONE) { | ||||||
|  | -        fi->direct_io = 1; | ||||||
|  | -    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | -        fi->keep_cache = 1; | ||||||
|  | +    err = lo_do_open(lo, inode, fi); | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  | +    if (err) { | ||||||
|  | +        fuse_reply_err(req, err); | ||||||
|  | +    } else { | ||||||
|  | +        fuse_reply_open(req, fi); | ||||||
|  |      } | ||||||
|  | -    fuse_reply_open(req, fi); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_release(fuse_req_t req, fuse_ino_t ino, | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,109 @@ | |||||||
|  | From 51b4b251442ade9163c9c9f953e5db00340ab3f1 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 15:02:07 +0000 | ||||||
|  | Subject: [PATCH] virtiofsd: optionally return inode pointer from | ||||||
|  |  lo_do_lookup() | ||||||
|  |  | ||||||
|  | lo_do_lookup() finds an existing inode or allocates a new one. It | ||||||
|  | increments nlookup so that the inode stays alive until the client | ||||||
|  | releases it. | ||||||
|  |  | ||||||
|  | Existing callers don't need the struct lo_inode so the function doesn't | ||||||
|  | return it. Extend the function to optionally return the inode. The next | ||||||
|  | commit will need it. | ||||||
|  |  | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210204150208.367837-3-stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- | ||||||
|  |  1 file changed, 21 insertions(+), 8 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index eaec0c5a..a874f509 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -752,11 +752,13 @@ static void posix_locks_value_destroy(gpointer data) | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* | ||||||
|  | - * Increments nlookup and caller must release refcount using | ||||||
|  | - * lo_inode_put(&parent). | ||||||
|  | + * Increments nlookup on the inode on success. unref_inode_lolocked() must be | ||||||
|  | + * called eventually to decrement nlookup again. If inodep is non-NULL, the | ||||||
|  | + * inode pointer is stored and the caller must call lo_inode_put(). | ||||||
|  |   */ | ||||||
|  |  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  | -                        struct fuse_entry_param *e) | ||||||
|  | +                        struct fuse_entry_param *e, | ||||||
|  | +                        struct lo_inode **inodep) | ||||||
|  |  { | ||||||
|  |      int newfd; | ||||||
|  |      int res; | ||||||
|  | @@ -765,6 +767,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |      struct lo_inode *inode = NULL; | ||||||
|  |      struct lo_inode *dir = lo_inode(req, parent); | ||||||
|  |   | ||||||
|  | +    if (inodep) { | ||||||
|  | +        *inodep = NULL; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  |      /* | ||||||
|  |       * name_to_handle_at() and open_by_handle_at() can reach here with fuse | ||||||
|  |       * mount point in guest, but we don't have its inode info in the | ||||||
|  | @@ -825,7 +831,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          pthread_mutex_unlock(&lo->mutex); | ||||||
|  |      } | ||||||
|  |      e->ino = inode->fuse_ino; | ||||||
|  | -    lo_inode_put(lo, &inode); | ||||||
|  | + | ||||||
|  | +    /* Transfer ownership of inode pointer to caller or drop it */ | ||||||
|  | +    if (inodep) { | ||||||
|  | +        *inodep = inode; | ||||||
|  | +    } else { | ||||||
|  | +        lo_inode_put(lo, &inode); | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  |      lo_inode_put(lo, &dir); | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent, | ||||||
|  | @@ -860,7 +873,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    err = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +    err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      if (err) { | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  |      } else { | ||||||
|  | @@ -968,7 +981,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    saverr = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +    saverr = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      if (saverr) { | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  | @@ -1437,7 +1450,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | ||||||
|  |   | ||||||
|  |          if (plus) { | ||||||
|  |              if (!is_dot_or_dotdot(name)) { | ||||||
|  | -                err = lo_do_lookup(req, ino, name, &e); | ||||||
|  | +                err = lo_do_lookup(req, ino, name, &e, NULL); | ||||||
|  |                  if (err) { | ||||||
|  |                      goto error; | ||||||
|  |                  } | ||||||
|  | @@ -1635,7 +1648,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |          } | ||||||
|  |   | ||||||
|  |          fi->fh = fh; | ||||||
|  | -        err = lo_do_lookup(req, parent, name, &e); | ||||||
|  | +        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  |      } | ||||||
|  |      if (lo->cache == CACHE_NONE) { | ||||||
|  |          fi->direct_io = 1; | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,298 @@ | |||||||
|  | From 0d3f3e7b142814bbd6cda096cb52891cca983c92 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Date: Thu, 4 Feb 2021 20:20:00 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) | ||||||
|  |  | ||||||
|  | A well-behaved FUSE client does not attempt to open special files with | ||||||
|  | FUSE_OPEN because they are handled on the client side (e.g. device nodes | ||||||
|  | are handled by client-side device drivers). | ||||||
|  |  | ||||||
|  | The check to prevent virtiofsd from opening special files is missing in | ||||||
|  | a few cases, most notably FUSE_OPEN. A malicious client can cause | ||||||
|  | virtiofsd to open a device node, potentially allowing the guest to | ||||||
|  | escape. This can be exploited by a modified guest device driver. It is | ||||||
|  | not exploitable from guest userspace since the guest kernel will handle | ||||||
|  | special files inside the guest instead of sending FUSE requests. | ||||||
|  |  | ||||||
|  | This patch fixes this issue by introducing the lo_inode_open() function | ||||||
|  | to check the file type before opening it. This is a short-term solution | ||||||
|  | because it does not prevent a compromised virtiofsd process from opening | ||||||
|  | device nodes on the host. | ||||||
|  |  | ||||||
|  | Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT | ||||||
|  | | O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not | ||||||
|  | necessary here. If the file exists and the user did not specify O_EXCL, | ||||||
|  | open it via lo_do_open(). | ||||||
|  |  | ||||||
|  | Reported-by: Alex Xu <alex@alxu.ca> | ||||||
|  | Fixes: CVE-2020-35517 | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Reviewed-by: Vivek Goyal <vgoyal@redhat.com> | ||||||
|  | Reviewed-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Message-Id: <20210204150208.367837-4-stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- | ||||||
|  |  1 file changed, 92 insertions(+), 52 deletions(-) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | index a874f509..41ff708a 100644 | ||||||
|  | --- a/tools/virtiofsd/passthrough_ll.c | ||||||
|  | +++ b/tools/virtiofsd/passthrough_ll.c | ||||||
|  | @@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) | ||||||
|  |      return fd; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* | ||||||
|  | + * Open a file descriptor for an inode. Returns -EBADF if the inode is not a | ||||||
|  | + * regular file or a directory. | ||||||
|  | + * | ||||||
|  | + * Use this helper function instead of raw openat(2) to prevent security issues | ||||||
|  | + * when a malicious client opens special files such as block device nodes. | ||||||
|  | + * Symlink inodes are also rejected since symlinks must already have been | ||||||
|  | + * traversed on the client side. | ||||||
|  | + */ | ||||||
|  | +static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | +                         int open_flags) | ||||||
|  | +{ | ||||||
|  | +    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); | ||||||
|  | +    int fd; | ||||||
|  | + | ||||||
|  | +    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { | ||||||
|  | +        return -EBADF; | ||||||
|  | +    } | ||||||
|  | + | ||||||
|  | +    /* | ||||||
|  | +     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier | ||||||
|  | +     * that the inode is not a special file but if an external process races | ||||||
|  | +     * with us then symlinks are traversed here. It is not possible to escape | ||||||
|  | +     * the shared directory since it is mounted as "/" though. | ||||||
|  | +     */ | ||||||
|  | +    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        return -errno; | ||||||
|  | +    } | ||||||
|  | +    return fd; | ||||||
|  | +} | ||||||
|  | + | ||||||
|  |  static void lo_init(void *userdata, struct fuse_conn_info *conn) | ||||||
|  |  { | ||||||
|  |      struct lo_data *lo = (struct lo_data *)userdata; | ||||||
|  | @@ -661,9 +693,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, | ||||||
|  |          if (fi) { | ||||||
|  |              truncfd = fd; | ||||||
|  |          } else { | ||||||
|  | -            sprintf(procname, "%i", ifd); | ||||||
|  | -            truncfd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||||
|  | +            truncfd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  |              if (truncfd < 0) { | ||||||
|  | +                errno = -truncfd; | ||||||
|  |                  goto out_err; | ||||||
|  |              } | ||||||
|  |          } | ||||||
|  | @@ -768,7 +800,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |      struct lo_inode *dir = lo_inode(req, parent); | ||||||
|  |   | ||||||
|  |      if (inodep) { | ||||||
|  | -        *inodep = NULL; | ||||||
|  | +        *inodep = NULL; /* in case there is an error */ | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      /* | ||||||
|  | @@ -1567,19 +1599,26 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||||
|  |      } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* | ||||||
|  | + * Open a regular file, set up an fd mapping, and fill out the struct | ||||||
|  | + * fuse_file_info for it. If existing_fd is not negative, use that fd instead | ||||||
|  | + * opening a new one. Takes ownership of existing_fd. | ||||||
|  | + * | ||||||
|  | + * Returns 0 on success or a positive errno. | ||||||
|  | + */ | ||||||
|  |  static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  | -                      struct fuse_file_info *fi) | ||||||
|  | +                      int existing_fd, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    char buf[64]; | ||||||
|  |      ssize_t fh; | ||||||
|  | -    int fd; | ||||||
|  | +    int fd = existing_fd; | ||||||
|  |   | ||||||
|  |      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  | -    sprintf(buf, "%i", inode->fd); | ||||||
|  | -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        return errno; | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        fd = lo_inode_open(lo, inode, fi->flags); | ||||||
|  | +        if (fd < 0) { | ||||||
|  | +            return -fd; | ||||||
|  | +        } | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      pthread_mutex_lock(&lo->mutex); | ||||||
|  | @@ -1602,9 +1641,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||||
|  |  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |                        mode_t mode, struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | -    int fd; | ||||||
|  | +    int fd = -1; | ||||||
|  |      struct lo_data *lo = lo_data(req); | ||||||
|  |      struct lo_inode *parent_inode; | ||||||
|  | +    struct lo_inode *inode = NULL; | ||||||
|  |      struct fuse_entry_param e; | ||||||
|  |      int err; | ||||||
|  |      struct lo_cred old = {}; | ||||||
|  | @@ -1630,36 +1670,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||||
|  |   | ||||||
|  |      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||||
|  |   | ||||||
|  | -    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||||
|  | -                mode); | ||||||
|  | +    /* Try to create a new file but don't open existing files */ | ||||||
|  | +    fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); | ||||||
|  |      err = fd == -1 ? errno : 0; | ||||||
|  | -    lo_restore_cred(&old); | ||||||
|  |   | ||||||
|  | -    if (!err) { | ||||||
|  | -        ssize_t fh; | ||||||
|  | +    lo_restore_cred(&old); | ||||||
|  |   | ||||||
|  | -        pthread_mutex_lock(&lo->mutex); | ||||||
|  | -        fh = lo_add_fd_mapping(lo, fd); | ||||||
|  | -        pthread_mutex_unlock(&lo->mutex); | ||||||
|  | -        if (fh == -1) { | ||||||
|  | -            close(fd); | ||||||
|  | -            err = ENOMEM; | ||||||
|  | -            goto out; | ||||||
|  | -        } | ||||||
|  | +    /* Ignore the error if file exists and O_EXCL was not given */ | ||||||
|  | +    if (err && (err != EEXIST || (fi->flags & O_EXCL))) { | ||||||
|  | +        goto out; | ||||||
|  | +    } | ||||||
|  |   | ||||||
|  | -        fi->fh = fh; | ||||||
|  | -        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||||
|  | +    err = lo_do_lookup(req, parent, name, &e, &inode); | ||||||
|  | +    if (err) { | ||||||
|  | +        goto out; | ||||||
|  |      } | ||||||
|  | -    if (lo->cache == CACHE_NONE) { | ||||||
|  | -        fi->direct_io = 1; | ||||||
|  | -    } else if (lo->cache == CACHE_ALWAYS) { | ||||||
|  | -        fi->keep_cache = 1; | ||||||
|  | + | ||||||
|  | +    err = lo_do_open(lo, inode, fd, fi); | ||||||
|  | +    fd = -1; /* lo_do_open() takes ownership of fd */ | ||||||
|  | +    if (err) { | ||||||
|  | +        /* Undo lo_do_lookup() nlookup ref */ | ||||||
|  | +        unref_inode_lolocked(lo, inode, 1); | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |  out: | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  |      lo_inode_put(lo, &parent_inode); | ||||||
|  |   | ||||||
|  |      if (err) { | ||||||
|  | +        if (fd >= 0) { | ||||||
|  | +            close(fd); | ||||||
|  | +        } | ||||||
|  | + | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  |      } else { | ||||||
|  |          fuse_reply_create(req, &e, fi); | ||||||
|  | @@ -1673,7 +1715,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||||
|  |                                                        pid_t pid, int *err) | ||||||
|  |  { | ||||||
|  |      struct lo_inode_plock *plock; | ||||||
|  | -    char procname[64]; | ||||||
|  |      int fd; | ||||||
|  |   | ||||||
|  |      plock = | ||||||
|  | @@ -1690,12 +1731,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      /* Open another instance of file which can be used for ofd locks. */ | ||||||
|  | -    sprintf(procname, "%i", inode->fd); | ||||||
|  | - | ||||||
|  |      /* TODO: What if file is not writable? */ | ||||||
|  | -    fd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||||
|  | -    if (fd == -1) { | ||||||
|  | -        *err = errno; | ||||||
|  | +    fd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  | +    if (fd < 0) { | ||||||
|  | +        *err = -fd; | ||||||
|  |          free(plock); | ||||||
|  |          return NULL; | ||||||
|  |      } | ||||||
|  | @@ -1842,7 +1881,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |          return; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    err = lo_do_open(lo, inode, fi); | ||||||
|  | +    err = lo_do_open(lo, inode, -1, fi); | ||||||
|  |      lo_inode_put(lo, &inode); | ||||||
|  |      if (err) { | ||||||
|  |          fuse_reply_err(req, err); | ||||||
|  | @@ -1898,39 +1937,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||||
|  |  static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||||
|  |                       struct fuse_file_info *fi) | ||||||
|  |  { | ||||||
|  | +    struct lo_inode *inode = lo_inode(req, ino); | ||||||
|  | +    struct lo_data *lo = lo_data(req); | ||||||
|  |      int res; | ||||||
|  |      int fd; | ||||||
|  | -    char *buf; | ||||||
|  |   | ||||||
|  |      fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | ||||||
|  |               (void *)fi); | ||||||
|  |   | ||||||
|  | -    if (!fi) { | ||||||
|  | -        struct lo_data *lo = lo_data(req); | ||||||
|  | - | ||||||
|  | -        res = asprintf(&buf, "%i", lo_fd(req, ino)); | ||||||
|  | -        if (res == -1) { | ||||||
|  | -            return (void)fuse_reply_err(req, errno); | ||||||
|  | -        } | ||||||
|  | +    if (!inode) { | ||||||
|  | +        fuse_reply_err(req, EBADF); | ||||||
|  | +        return; | ||||||
|  | +    } | ||||||
|  |   | ||||||
|  | -        fd = openat(lo->proc_self_fd, buf, O_RDWR); | ||||||
|  | -        free(buf); | ||||||
|  | -        if (fd == -1) { | ||||||
|  | -            return (void)fuse_reply_err(req, errno); | ||||||
|  | +    if (!fi) { | ||||||
|  | +        fd = lo_inode_open(lo, inode, O_RDWR); | ||||||
|  | +        if (fd < 0) { | ||||||
|  | +            res = -fd; | ||||||
|  | +            goto out; | ||||||
|  |          } | ||||||
|  |      } else { | ||||||
|  |          fd = lo_fi_fd(req, fi); | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |      if (datasync) { | ||||||
|  | -        res = fdatasync(fd); | ||||||
|  | +        res = fdatasync(fd) == -1 ? errno : 0; | ||||||
|  |      } else { | ||||||
|  | -        res = fsync(fd); | ||||||
|  | +        res = fsync(fd) == -1 ? errno : 0; | ||||||
|  |      } | ||||||
|  |      if (!fi) { | ||||||
|  |          close(fd); | ||||||
|  |      } | ||||||
|  | -    fuse_reply_err(req, res == -1 ? errno : 0); | ||||||
|  | +out: | ||||||
|  | +    lo_inode_put(lo, &inode); | ||||||
|  | +    fuse_reply_err(req, res); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,31 @@ | |||||||
|  | From 7670d5d5061a8d959a367bdd9d86b6b97a3bf4e9 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Greg Kurz <groug@kaod.org> | ||||||
|  | Date: Thu, 4 Feb 2021 20:23:16 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist | ||||||
|  |  | ||||||
|  | This is how glibc implements lseek(2) on POWER. | ||||||
|  |  | ||||||
|  | BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 | ||||||
|  | Signed-off-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210121171540.1449777-1-groug@kaod.org> | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/seccomp.c | 1 + | ||||||
|  |  1 file changed, 1 insertion(+) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||||
|  | index 7f93fe87..57642ff8 100644 | ||||||
|  | --- a/tools/virtiofsd/seccomp.c | ||||||
|  | +++ b/tools/virtiofsd/seccomp.c | ||||||
|  | @@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { | ||||||
|  |      SCMP_SYS(linkat), | ||||||
|  |      SCMP_SYS(listxattr), | ||||||
|  |      SCMP_SYS(lseek), | ||||||
|  | +    SCMP_SYS(_llseek), /* For POWER */ | ||||||
|  |      SCMP_SYS(madvise), | ||||||
|  |      SCMP_SYS(mkdirat), | ||||||
|  |      SCMP_SYS(mknodat), | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
| @@ -0,0 +1,33 @@ | |||||||
|  | From 82bc7d7c11ff868ca9b0f663e847f535eab4e3ac Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Greg Kurz <groug@kaod.org> | ||||||
|  | Date: Thu, 4 Feb 2021 20:24:28 +0100 | ||||||
|  | Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist | ||||||
|  |  | ||||||
|  | This is how linux restarts some system calls after SIGSTOP/SIGCONT. | ||||||
|  | This is needed to avoid virtiofsd termination when resuming execution | ||||||
|  | under GDB for example. | ||||||
|  |  | ||||||
|  | Signed-off-by: Greg Kurz <groug@kaod.org> | ||||||
|  | Message-Id: <20210201193305.136390-1-groug@kaod.org> | ||||||
|  | Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||||
|  | Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||||
|  | --- | ||||||
|  |  tools/virtiofsd/seccomp.c | 1 + | ||||||
|  |  1 file changed, 1 insertion(+) | ||||||
|  |  | ||||||
|  | diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||||
|  | index 57642ff8..004f5026 100644 | ||||||
|  | --- a/tools/virtiofsd/seccomp.c | ||||||
|  | +++ b/tools/virtiofsd/seccomp.c | ||||||
|  | @@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { | ||||||
|  |      SCMP_SYS(renameat), | ||||||
|  |      SCMP_SYS(renameat2), | ||||||
|  |      SCMP_SYS(removexattr), | ||||||
|  | +    SCMP_SYS(restart_syscall), | ||||||
|  |      SCMP_SYS(rt_sigaction), | ||||||
|  |      SCMP_SYS(rt_sigprocmask), | ||||||
|  |      SCMP_SYS(rt_sigreturn), | ||||||
|  | --  | ||||||
|  | 2.29.2 | ||||||
|  |  | ||||||
		Reference in New Issue
	
	Block a user