mirror of
				https://github.com/kata-containers/kata-containers.git
				synced 2025-10-24 21:51:37 +00:00 
			
		
		
		
	qemu: Add security fixes for CVE-2020-35517
This series is based on https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01787.html, and was kindly brought up by David Gilbert. Fixes: #1361 Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
This commit is contained in:
		| @@ -0,0 +1,104 @@ | ||||
| From f876aae825d77aec1a735ecf5b2bc821eba11913 Mon Sep 17 00:00:00 2001 | ||||
| From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||
| Date: Thu, 4 Feb 2021 22:22:03 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options | ||||
|  | ||||
| Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT | ||||
| open flag of guest applications will be discarded by virtiofsd. While | ||||
| this behavior makes it consistent with the virtio-9p scheme when guest | ||||
| applications use direct I/O, we no longer have any chance to bypass the | ||||
| host page cache. | ||||
|  | ||||
| Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o | ||||
|  no_allow_direct_io' option is added, or none of '-o allow_direct_io' or | ||||
|  '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to | ||||
|  0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' | ||||
| is added to the starting command-line, 'allow_direct_io' will be set to | ||||
| 1, so that the O_DIRECT flags will be retained and host page cache can | ||||
| be bypassed. | ||||
|  | ||||
| Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||
| Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ | ||||
|  1 file changed, 14 insertions(+), 6 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index f7b9c1d2..bec60352 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -151,6 +151,7 @@ struct lo_data { | ||||
|      int timeout_set; | ||||
|      int readdirplus_set; | ||||
|      int readdirplus_clear; | ||||
| +    int allow_direct_io; | ||||
|      struct lo_inode root; | ||||
|      GHashTable *inodes; /* protected by lo->mutex */ | ||||
|      struct lo_map ino_map; /* protected by lo->mutex */ | ||||
| @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { | ||||
|      { "norace", offsetof(struct lo_data, norace), 1 }, | ||||
|      { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, | ||||
|      { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | ||||
| +    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, | ||||
| +    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | ||||
|      FUSE_OPT_END | ||||
|  }; | ||||
|  static bool use_syslog = false; | ||||
| @@ -1677,7 +1680,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, | ||||
|      fuse_reply_err(req, 0); | ||||
|  } | ||||
|   | ||||
| -static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||
| +static void update_open_flags(int writeback, int allow_direct_io, | ||||
| +                              struct fuse_file_info *fi) | ||||
|  { | ||||
|      /* | ||||
|       * With writeback cache, kernel may send read requests even | ||||
| @@ -1702,10 +1706,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||
|   | ||||
|      /* | ||||
|       * O_DIRECT in guest should not necessarily mean bypassing page | ||||
| -     * cache on host as well. If somebody needs that behavior, it | ||||
| -     * probably should be a configuration knob in daemon. | ||||
| +     * cache on host as well. Therefore, we discard it by default | ||||
| +     * ('-o no_allow_direct_io'). If somebody needs that behavior, | ||||
| +     * the '-o allow_direct_io' option should be set. | ||||
|       */ | ||||
| -    fi->flags &= ~O_DIRECT; | ||||
| +    if (!allow_direct_io) { | ||||
| +        fi->flags &= ~O_DIRECT; | ||||
| +    } | ||||
|  } | ||||
|   | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
| @@ -1737,7 +1744,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          goto out; | ||||
|      } | ||||
|   | ||||
| -    update_open_flags(lo->writeback, fi); | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
|      fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||
|                  mode); | ||||
| @@ -1947,7 +1954,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||
|               fi->flags); | ||||
|   | ||||
| -    update_open_flags(lo->writeback, fi); | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
|      sprintf(buf, "%i", lo_fd(req, ino)); | ||||
|      fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| @@ -2852,6 +2859,7 @@ int main(int argc, char *argv[]) | ||||
|          .debug = 0, | ||||
|          .writeback = 0, | ||||
|          .posix_lock = 1, | ||||
| +	.allow_direct_io = 0, | ||||
|          .proc_self_fd = -1, | ||||
|      }; | ||||
|      struct lo_map_elem *root_elem; | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,145 @@ | ||||
| From 7979901047bdad744b1cfaedb1fa99dc4c5738b8 Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 20:16:42 +0100 | ||||
| Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() | ||||
|  | ||||
| Both lo_open() and lo_create() have similar code to open a file. Extract | ||||
| a common lo_do_open() function from lo_open() that will be used by | ||||
| lo_create() in a later commit. | ||||
|  | ||||
| Since lo_do_open() does not otherwise need fuse_req_t req, convert | ||||
| lo_add_fd_mapping() to use struct lo_data *lo instead. | ||||
|  | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20210204150208.367837-2-stefanha@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ | ||||
|  1 file changed, 46 insertions(+), 27 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index bec60352..6eb6a754 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) | ||||
|  } | ||||
|   | ||||
|  /* Assumes lo->mutex is held */ | ||||
| -static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) | ||||
| +static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) | ||||
|  { | ||||
|      struct lo_map_elem *elem; | ||||
|   | ||||
| -    elem = lo_map_alloc_elem(&lo_data(req)->fd_map); | ||||
| +    elem = lo_map_alloc_elem(&lo->fd_map); | ||||
|      if (!elem) { | ||||
|          return -1; | ||||
|      } | ||||
|   | ||||
|      elem->fd = fd; | ||||
| -    return elem - lo_data(req)->fd_map.elems; | ||||
| +    return elem - lo->fd_map.elems; | ||||
|  } | ||||
|   | ||||
|  /* Assumes lo->mutex is held */ | ||||
| @@ -1715,6 +1715,38 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||
|      } | ||||
|  } | ||||
|   | ||||
| +static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| +                      struct fuse_file_info *fi) | ||||
| +{ | ||||
| +    char buf[64]; | ||||
| +    ssize_t fh; | ||||
| +    int fd; | ||||
| + | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
| + | ||||
| +    sprintf(buf, "%i", inode->fd); | ||||
| +    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| +    if (fd == -1) { | ||||
| +        return errno; | ||||
| +    } | ||||
| + | ||||
| +    pthread_mutex_lock(&lo->mutex); | ||||
| +    fh = lo_add_fd_mapping(lo, fd); | ||||
| +    pthread_mutex_unlock(&lo->mutex); | ||||
| +    if (fh == -1) { | ||||
| +        close(fd); | ||||
| +        return ENOMEM; | ||||
| +    } | ||||
| + | ||||
| +    fi->fh = fh; | ||||
| +    if (lo->cache == CACHE_NONE) { | ||||
| +        fi->direct_io = 1; | ||||
| +    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| +        fi->keep_cache = 1; | ||||
| +    } | ||||
| +    return 0; | ||||
| +} | ||||
| + | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|                        mode_t mode, struct fuse_file_info *fi) | ||||
|  { | ||||
| @@ -1755,7 +1787,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          ssize_t fh; | ||||
|   | ||||
|          pthread_mutex_lock(&lo->mutex); | ||||
| -        fh = lo_add_fd_mapping(req, fd); | ||||
| +        fh = lo_add_fd_mapping(lo, fd); | ||||
|          pthread_mutex_unlock(&lo->mutex); | ||||
|          if (fh == -1) { | ||||
|              close(fd); | ||||
| @@ -1946,38 +1978,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||
|   | ||||
|  static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    int fd; | ||||
| -    ssize_t fh; | ||||
| -    char buf[64]; | ||||
|      struct lo_data *lo = lo_data(req); | ||||
| +    struct lo_inode *inode = lo_inode(req, ino); | ||||
| +    int err; | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||
|               fi->flags); | ||||
|   | ||||
| -    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
| - | ||||
| -    sprintf(buf, "%i", lo_fd(req, ino)); | ||||
| -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| -    if (fd == -1) { | ||||
| -        return (void)fuse_reply_err(req, errno); | ||||
| -    } | ||||
| - | ||||
| -    pthread_mutex_lock(&lo->mutex); | ||||
| -    fh = lo_add_fd_mapping(req, fd); | ||||
| -    pthread_mutex_unlock(&lo->mutex); | ||||
| -    if (fh == -1) { | ||||
| -        close(fd); | ||||
| -        fuse_reply_err(req, ENOMEM); | ||||
| +    if (!inode) { | ||||
| +        fuse_reply_err(req, EBADF); | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    fi->fh = fh; | ||||
| -    if (lo->cache == CACHE_NONE) { | ||||
| -        fi->direct_io = 1; | ||||
| -    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| -        fi->keep_cache = 1; | ||||
| +    err = lo_do_open(lo, inode, fi); | ||||
| +    lo_inode_put(lo, &inode); | ||||
| +    if (err) { | ||||
| +        fuse_reply_err(req, err); | ||||
| +    } else { | ||||
| +        fuse_reply_open(req, fi); | ||||
|      } | ||||
| -    fuse_reply_open(req, fi); | ||||
|  } | ||||
|   | ||||
|  static void lo_release(fuse_req_t req, fuse_ino_t ino, | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,109 @@ | ||||
| From 6772514121ccfecc0824b8f2ec39377368c564a0 Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 15:02:07 +0000 | ||||
| Subject: [PATCH] virtiofsd: optionally return inode pointer from | ||||
|  lo_do_lookup() | ||||
|  | ||||
| lo_do_lookup() finds an existing inode or allocates a new one. It | ||||
| increments nlookup so that the inode stays alive until the client | ||||
| releases it. | ||||
|  | ||||
| Existing callers don't need the struct lo_inode so the function doesn't | ||||
| return it. Extend the function to optionally return the inode. The next | ||||
| commit will need it. | ||||
|  | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210204150208.367837-3-stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- | ||||
|  1 file changed, 21 insertions(+), 8 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index 6eb6a754..422ea75f 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -881,11 +881,13 @@ static void posix_locks_value_destroy(gpointer data) | ||||
|  } | ||||
|   | ||||
|  /* | ||||
| - * Increments nlookup and caller must release refcount using | ||||
| - * lo_inode_put(&parent). | ||||
| + * Increments nlookup on the inode on success. unref_inode_lolocked() must be | ||||
| + * called eventually to decrement nlookup again. If inodep is non-NULL, the | ||||
| + * inode pointer is stored and the caller must call lo_inode_put(). | ||||
|   */ | ||||
|  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
| -                        struct fuse_entry_param *e) | ||||
| +                        struct fuse_entry_param *e, | ||||
| +                        struct lo_inode **inodep) | ||||
|  { | ||||
|      int newfd; | ||||
|      int res; | ||||
| @@ -894,6 +896,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|      struct lo_inode *inode = NULL; | ||||
|      struct lo_inode *dir = lo_inode(req, parent); | ||||
|   | ||||
| +    if (inodep) { | ||||
| +        *inodep = NULL; | ||||
| +    } | ||||
| + | ||||
|      /* | ||||
|       * name_to_handle_at() and open_by_handle_at() can reach here with fuse | ||||
|       * mount point in guest, but we don't have its inode info in the | ||||
| @@ -954,7 +960,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          pthread_mutex_unlock(&lo->mutex); | ||||
|      } | ||||
|      e->ino = inode->fuse_ino; | ||||
| -    lo_inode_put(lo, &inode); | ||||
| + | ||||
| +    /* Transfer ownership of inode pointer to caller or drop it */ | ||||
| +    if (inodep) { | ||||
| +        *inodep = inode; | ||||
| +    } else { | ||||
| +        lo_inode_put(lo, &inode); | ||||
| +    } | ||||
| + | ||||
|      lo_inode_put(lo, &dir); | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent, | ||||
| @@ -989,7 +1002,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    err = lo_do_lookup(req, parent, name, &e); | ||||
| +    err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      if (err) { | ||||
|          fuse_reply_err(req, err); | ||||
|      } else { | ||||
| @@ -1097,7 +1110,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, | ||||
|          goto out; | ||||
|      } | ||||
|   | ||||
| -    saverr = lo_do_lookup(req, parent, name, &e); | ||||
| +    saverr = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      if (saverr) { | ||||
|          goto out; | ||||
|      } | ||||
| @@ -1598,7 +1611,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | ||||
|   | ||||
|          if (plus) { | ||||
|              if (!is_dot_or_dotdot(name)) { | ||||
| -                err = lo_do_lookup(req, ino, name, &e); | ||||
| +                err = lo_do_lookup(req, ino, name, &e, NULL); | ||||
|                  if (err) { | ||||
|                      goto error; | ||||
|                  } | ||||
| @@ -1796,7 +1809,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          } | ||||
|   | ||||
|          fi->fh = fh; | ||||
| -        err = lo_do_lookup(req, parent, name, &e); | ||||
| +        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      } | ||||
|      if (lo->cache == CACHE_NONE) { | ||||
|          fi->direct_io = 1; | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,298 @@ | ||||
| From 19f9b9c7fb8433830b661441c6646cda42223354 Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 20:20:00 +0100 | ||||
| Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) | ||||
|  | ||||
| A well-behaved FUSE client does not attempt to open special files with | ||||
| FUSE_OPEN because they are handled on the client side (e.g. device nodes | ||||
| are handled by client-side device drivers). | ||||
|  | ||||
| The check to prevent virtiofsd from opening special files is missing in | ||||
| a few cases, most notably FUSE_OPEN. A malicious client can cause | ||||
| virtiofsd to open a device node, potentially allowing the guest to | ||||
| escape. This can be exploited by a modified guest device driver. It is | ||||
| not exploitable from guest userspace since the guest kernel will handle | ||||
| special files inside the guest instead of sending FUSE requests. | ||||
|  | ||||
| This patch fixes this issue by introducing the lo_inode_open() function | ||||
| to check the file type before opening it. This is a short-term solution | ||||
| because it does not prevent a compromised virtiofsd process from opening | ||||
| device nodes on the host. | ||||
|  | ||||
| Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT | ||||
| | O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not | ||||
| necessary here. If the file exists and the user did not specify O_EXCL, | ||||
| open it via lo_do_open(). | ||||
|  | ||||
| Reported-by: Alex Xu <alex@alxu.ca> | ||||
| Fixes: CVE-2020-35517 | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Reviewed-by: Vivek Goyal <vgoyal@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20210204150208.367837-4-stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- | ||||
|  1 file changed, 92 insertions(+), 52 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index 422ea75f..da4310e3 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) | ||||
|      return fd; | ||||
|  } | ||||
|   | ||||
| +/* | ||||
| + * Open a file descriptor for an inode. Returns -EBADF if the inode is not a | ||||
| + * regular file or a directory. | ||||
| + * | ||||
| + * Use this helper function instead of raw openat(2) to prevent security issues | ||||
| + * when a malicious client opens special files such as block device nodes. | ||||
| + * Symlink inodes are also rejected since symlinks must already have been | ||||
| + * traversed on the client side. | ||||
| + */ | ||||
| +static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| +                         int open_flags) | ||||
| +{ | ||||
| +    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); | ||||
| +    int fd; | ||||
| + | ||||
| +    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { | ||||
| +        return -EBADF; | ||||
| +    } | ||||
| + | ||||
| +    /* | ||||
| +     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier | ||||
| +     * that the inode is not a special file but if an external process races | ||||
| +     * with us then symlinks are traversed here. It is not possible to escape | ||||
| +     * the shared directory since it is mounted as "/" though. | ||||
| +     */ | ||||
| +    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); | ||||
| +    if (fd < 0) { | ||||
| +        return -errno; | ||||
| +    } | ||||
| +    return fd; | ||||
| +} | ||||
| + | ||||
|  static void lo_init(void *userdata, struct fuse_conn_info *conn) | ||||
|  { | ||||
|      struct lo_data *lo = (struct lo_data *)userdata; | ||||
| @@ -791,9 +823,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, | ||||
|          if (fi) { | ||||
|              truncfd = fd; | ||||
|          } else { | ||||
| -            sprintf(procname, "%i", ifd); | ||||
| -            truncfd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||
| +            truncfd = lo_inode_open(lo, inode, O_RDWR); | ||||
|              if (truncfd < 0) { | ||||
| +                errno = -truncfd; | ||||
|                  goto out_err; | ||||
|              } | ||||
|          } | ||||
| @@ -897,7 +929,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|      struct lo_inode *dir = lo_inode(req, parent); | ||||
|   | ||||
|      if (inodep) { | ||||
| -        *inodep = NULL; | ||||
| +        *inodep = NULL; /* in case there is an error */ | ||||
|      } | ||||
|   | ||||
|      /* | ||||
| @@ -1728,19 +1760,26 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||
|      } | ||||
|  } | ||||
|   | ||||
| +/* | ||||
| + * Open a regular file, set up an fd mapping, and fill out the struct | ||||
| + * fuse_file_info for it. If existing_fd is not negative, use that fd instead | ||||
| + * opening a new one. Takes ownership of existing_fd. | ||||
| + * | ||||
| + * Returns 0 on success or a positive errno. | ||||
| + */ | ||||
|  static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| -                      struct fuse_file_info *fi) | ||||
| +                      int existing_fd, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    char buf[64]; | ||||
|      ssize_t fh; | ||||
| -    int fd; | ||||
| +    int fd = existing_fd; | ||||
|   | ||||
|      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
| -    sprintf(buf, "%i", inode->fd); | ||||
| -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| -    if (fd == -1) { | ||||
| -        return errno; | ||||
| +    if (fd < 0) { | ||||
| +        fd = lo_inode_open(lo, inode, fi->flags); | ||||
| +        if (fd < 0) { | ||||
| +            return -fd; | ||||
| +        } | ||||
|      } | ||||
|   | ||||
|      pthread_mutex_lock(&lo->mutex); | ||||
| @@ -1763,9 +1802,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|                        mode_t mode, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    int fd; | ||||
| +    int fd = -1; | ||||
|      struct lo_data *lo = lo_data(req); | ||||
|      struct lo_inode *parent_inode; | ||||
| +    struct lo_inode *inode = NULL; | ||||
|      struct fuse_entry_param e; | ||||
|      int err; | ||||
|      struct lo_cred old = {}; | ||||
| @@ -1791,36 +1831,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|   | ||||
|      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
| -    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||
| -                mode); | ||||
| +    /* Try to create a new file but don't open existing files */ | ||||
| +    fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); | ||||
|      err = fd == -1 ? errno : 0; | ||||
| -    lo_restore_cred(&old); | ||||
|   | ||||
| -    if (!err) { | ||||
| -        ssize_t fh; | ||||
| +    lo_restore_cred(&old); | ||||
|   | ||||
| -        pthread_mutex_lock(&lo->mutex); | ||||
| -        fh = lo_add_fd_mapping(lo, fd); | ||||
| -        pthread_mutex_unlock(&lo->mutex); | ||||
| -        if (fh == -1) { | ||||
| -            close(fd); | ||||
| -            err = ENOMEM; | ||||
| -            goto out; | ||||
| -        } | ||||
| +    /* Ignore the error if file exists and O_EXCL was not given */ | ||||
| +    if (err && (err != EEXIST || (fi->flags & O_EXCL))) { | ||||
| +        goto out; | ||||
| +    } | ||||
|   | ||||
| -        fi->fh = fh; | ||||
| -        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
| +    err = lo_do_lookup(req, parent, name, &e, &inode); | ||||
| +    if (err) { | ||||
| +        goto out; | ||||
|      } | ||||
| -    if (lo->cache == CACHE_NONE) { | ||||
| -        fi->direct_io = 1; | ||||
| -    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| -        fi->keep_cache = 1; | ||||
| + | ||||
| +    err = lo_do_open(lo, inode, fd, fi); | ||||
| +    fd = -1; /* lo_do_open() takes ownership of fd */ | ||||
| +    if (err) { | ||||
| +        /* Undo lo_do_lookup() nlookup ref */ | ||||
| +        unref_inode_lolocked(lo, inode, 1); | ||||
|      } | ||||
|   | ||||
|  out: | ||||
| +    lo_inode_put(lo, &inode); | ||||
|      lo_inode_put(lo, &parent_inode); | ||||
|   | ||||
|      if (err) { | ||||
| +        if (fd >= 0) { | ||||
| +            close(fd); | ||||
| +        } | ||||
| + | ||||
|          fuse_reply_err(req, err); | ||||
|      } else { | ||||
|          fuse_reply_create(req, &e, fi); | ||||
| @@ -1834,7 +1876,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||
|                                                        pid_t pid, int *err) | ||||
|  { | ||||
|      struct lo_inode_plock *plock; | ||||
| -    char procname[64]; | ||||
|      int fd; | ||||
|   | ||||
|      plock = | ||||
| @@ -1851,12 +1892,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||
|      } | ||||
|   | ||||
|      /* Open another instance of file which can be used for ofd locks. */ | ||||
| -    sprintf(procname, "%i", inode->fd); | ||||
| - | ||||
|      /* TODO: What if file is not writable? */ | ||||
| -    fd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||
| -    if (fd == -1) { | ||||
| -        *err = errno; | ||||
| +    fd = lo_inode_open(lo, inode, O_RDWR); | ||||
| +    if (fd < 0) { | ||||
| +        *err = -fd; | ||||
|          free(plock); | ||||
|          return NULL; | ||||
|      } | ||||
| @@ -2003,7 +2042,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    err = lo_do_open(lo, inode, fi); | ||||
| +    err = lo_do_open(lo, inode, -1, fi); | ||||
|      lo_inode_put(lo, &inode); | ||||
|      if (err) { | ||||
|          fuse_reply_err(req, err); | ||||
| @@ -2059,39 +2098,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|  static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||
|                       struct fuse_file_info *fi) | ||||
|  { | ||||
| +    struct lo_inode *inode = lo_inode(req, ino); | ||||
| +    struct lo_data *lo = lo_data(req); | ||||
|      int res; | ||||
|      int fd; | ||||
| -    char *buf; | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | ||||
|               (void *)fi); | ||||
|   | ||||
| -    if (!fi) { | ||||
| -        struct lo_data *lo = lo_data(req); | ||||
| - | ||||
| -        res = asprintf(&buf, "%i", lo_fd(req, ino)); | ||||
| -        if (res == -1) { | ||||
| -            return (void)fuse_reply_err(req, errno); | ||||
| -        } | ||||
| +    if (!inode) { | ||||
| +        fuse_reply_err(req, EBADF); | ||||
| +        return; | ||||
| +    } | ||||
|   | ||||
| -        fd = openat(lo->proc_self_fd, buf, O_RDWR); | ||||
| -        free(buf); | ||||
| -        if (fd == -1) { | ||||
| -            return (void)fuse_reply_err(req, errno); | ||||
| +    if (!fi) { | ||||
| +        fd = lo_inode_open(lo, inode, O_RDWR); | ||||
| +        if (fd < 0) { | ||||
| +            res = -fd; | ||||
| +            goto out; | ||||
|          } | ||||
|      } else { | ||||
|          fd = lo_fi_fd(req, fi); | ||||
|      } | ||||
|   | ||||
|      if (datasync) { | ||||
| -        res = fdatasync(fd); | ||||
| +        res = fdatasync(fd) == -1 ? errno : 0; | ||||
|      } else { | ||||
| -        res = fsync(fd); | ||||
| +        res = fsync(fd) == -1 ? errno : 0; | ||||
|      } | ||||
|      if (!fi) { | ||||
|          close(fd); | ||||
|      } | ||||
| -    fuse_reply_err(req, res == -1 ? errno : 0); | ||||
| +out: | ||||
| +    lo_inode_put(lo, &inode); | ||||
| +    fuse_reply_err(req, res); | ||||
|  } | ||||
|   | ||||
|  static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,31 @@ | ||||
| From a68a49b83f6c2694a080bb3761f566bd84b7ca26 Mon Sep 17 00:00:00 2001 | ||||
| From: Greg Kurz <groug@kaod.org> | ||||
| Date: Thu, 4 Feb 2021 20:23:16 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist | ||||
|  | ||||
| This is how glibc implements lseek(2) on POWER. | ||||
|  | ||||
| BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 | ||||
| Signed-off-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210121171540.1449777-1-groug@kaod.org> | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/seccomp.c | 1 + | ||||
|  1 file changed, 1 insertion(+) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||
| index 7f93fe87..57642ff8 100644 | ||||
| --- a/tools/virtiofsd/seccomp.c | ||||
| +++ b/tools/virtiofsd/seccomp.c | ||||
| @@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { | ||||
|      SCMP_SYS(linkat), | ||||
|      SCMP_SYS(listxattr), | ||||
|      SCMP_SYS(lseek), | ||||
| +    SCMP_SYS(_llseek), /* For POWER */ | ||||
|      SCMP_SYS(madvise), | ||||
|      SCMP_SYS(mkdirat), | ||||
|      SCMP_SYS(mknodat), | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,33 @@ | ||||
| From c013c9a1d796d1feae143f02b3c654f0a42f7055 Mon Sep 17 00:00:00 2001 | ||||
| From: Greg Kurz <groug@kaod.org> | ||||
| Date: Thu, 4 Feb 2021 20:24:28 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist | ||||
|  | ||||
| This is how linux restarts some system calls after SIGSTOP/SIGCONT. | ||||
| This is needed to avoid virtiofsd termination when resuming execution | ||||
| under GDB for example. | ||||
|  | ||||
| Signed-off-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210201193305.136390-1-groug@kaod.org> | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/seccomp.c | 1 + | ||||
|  1 file changed, 1 insertion(+) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||
| index 57642ff8..004f5026 100644 | ||||
| --- a/tools/virtiofsd/seccomp.c | ||||
| +++ b/tools/virtiofsd/seccomp.c | ||||
| @@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { | ||||
|      SCMP_SYS(renameat), | ||||
|      SCMP_SYS(renameat2), | ||||
|      SCMP_SYS(removexattr), | ||||
| +    SCMP_SYS(restart_syscall), | ||||
|      SCMP_SYS(rt_sigaction), | ||||
|      SCMP_SYS(rt_sigprocmask), | ||||
|      SCMP_SYS(rt_sigreturn), | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,104 @@ | ||||
| From c5c8e946f8200de1c6d917949e98484908b43f21 Mon Sep 17 00:00:00 2001 | ||||
| From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||
| Date: Thu, 4 Feb 2021 22:11:28 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options | ||||
|  | ||||
| Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT | ||||
| open flag of guest applications will be discarded by virtiofsd. While | ||||
| this behavior makes it consistent with the virtio-9p scheme when guest | ||||
| applications use direct I/O, we no longer have any chance to bypass the | ||||
| host page cache. | ||||
|  | ||||
| Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o | ||||
|  no_allow_direct_io' option is added, or none of '-o allow_direct_io' or | ||||
|  '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to | ||||
|  0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' | ||||
| is added to the starting command-line, 'allow_direct_io' will be set to | ||||
| 1, so that the O_DIRECT flags will be retained and host page cache can | ||||
| be bypassed. | ||||
|  | ||||
| Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com> | ||||
| Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ | ||||
|  1 file changed, 14 insertions(+), 6 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index 94e0de2d..289c9985 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -151,6 +151,7 @@ struct lo_data { | ||||
|      int timeout_set; | ||||
|      int readdirplus_set; | ||||
|      int readdirplus_clear; | ||||
| +    int allow_direct_io; | ||||
|      struct lo_inode root; | ||||
|      GHashTable *inodes; /* protected by lo->mutex */ | ||||
|      struct lo_map ino_map; /* protected by lo->mutex */ | ||||
| @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { | ||||
|      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, | ||||
|      { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, | ||||
|      { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | ||||
| +    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, | ||||
| +    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | ||||
|      FUSE_OPT_END | ||||
|  }; | ||||
|  static bool use_syslog = false; | ||||
| @@ -1516,7 +1519,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, | ||||
|      fuse_reply_err(req, 0); | ||||
|  } | ||||
|   | ||||
| -static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||
| +static void update_open_flags(int writeback, int allow_direct_io, | ||||
| +                              struct fuse_file_info *fi) | ||||
|  { | ||||
|      /* | ||||
|       * With writeback cache, kernel may send read requests even | ||||
| @@ -1541,10 +1545,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) | ||||
|   | ||||
|      /* | ||||
|       * O_DIRECT in guest should not necessarily mean bypassing page | ||||
| -     * cache on host as well. If somebody needs that behavior, it | ||||
| -     * probably should be a configuration knob in daemon. | ||||
| +     * cache on host as well. Therefore, we discard it by default | ||||
| +     * ('-o no_allow_direct_io'). If somebody needs that behavior, | ||||
| +     * the '-o allow_direct_io' option should be set. | ||||
|       */ | ||||
| -    fi->flags &= ~O_DIRECT; | ||||
| +    if (!allow_direct_io) { | ||||
| +        fi->flags &= ~O_DIRECT; | ||||
| +    } | ||||
|  } | ||||
|   | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
| @@ -1576,7 +1583,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          goto out; | ||||
|      } | ||||
|   | ||||
| -    update_open_flags(lo->writeback, fi); | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
|      fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||
|                  mode); | ||||
| @@ -1786,7 +1793,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||
|               fi->flags); | ||||
|   | ||||
| -    update_open_flags(lo->writeback, fi); | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
|      sprintf(buf, "%i", lo_fd(req, ino)); | ||||
|      fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| @@ -2824,6 +2831,7 @@ int main(int argc, char *argv[]) | ||||
|          .debug = 0, | ||||
|          .writeback = 0, | ||||
|          .posix_lock = 1, | ||||
| +	.allow_direct_io = 0, | ||||
|          .proc_self_fd = -1, | ||||
|      }; | ||||
|      struct lo_map_elem *root_elem; | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,145 @@ | ||||
| From 04600a6b1ad6376e6da955a929196b9c19fa7f4a Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 20:16:42 +0100 | ||||
| Subject: [PATCH] virtiofsd: extract lo_do_open() from lo_open() | ||||
|  | ||||
| Both lo_open() and lo_create() have similar code to open a file. Extract | ||||
| a common lo_do_open() function from lo_open() that will be used by | ||||
| lo_create() in a later commit. | ||||
|  | ||||
| Since lo_do_open() does not otherwise need fuse_req_t req, convert | ||||
| lo_add_fd_mapping() to use struct lo_data *lo instead. | ||||
|  | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20210204150208.367837-2-stefanha@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ | ||||
|  1 file changed, 46 insertions(+), 27 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index 289c9985..eaec0c5a 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -442,17 +442,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) | ||||
|  } | ||||
|   | ||||
|  /* Assumes lo->mutex is held */ | ||||
| -static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) | ||||
| +static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) | ||||
|  { | ||||
|      struct lo_map_elem *elem; | ||||
|   | ||||
| -    elem = lo_map_alloc_elem(&lo_data(req)->fd_map); | ||||
| +    elem = lo_map_alloc_elem(&lo->fd_map); | ||||
|      if (!elem) { | ||||
|          return -1; | ||||
|      } | ||||
|   | ||||
|      elem->fd = fd; | ||||
| -    return elem - lo_data(req)->fd_map.elems; | ||||
| +    return elem - lo->fd_map.elems; | ||||
|  } | ||||
|   | ||||
|  /* Assumes lo->mutex is held */ | ||||
| @@ -1554,6 +1554,38 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||
|      } | ||||
|  } | ||||
|   | ||||
| +static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| +                      struct fuse_file_info *fi) | ||||
| +{ | ||||
| +    char buf[64]; | ||||
| +    ssize_t fh; | ||||
| +    int fd; | ||||
| + | ||||
| +    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
| + | ||||
| +    sprintf(buf, "%i", inode->fd); | ||||
| +    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| +    if (fd == -1) { | ||||
| +        return errno; | ||||
| +    } | ||||
| + | ||||
| +    pthread_mutex_lock(&lo->mutex); | ||||
| +    fh = lo_add_fd_mapping(lo, fd); | ||||
| +    pthread_mutex_unlock(&lo->mutex); | ||||
| +    if (fh == -1) { | ||||
| +        close(fd); | ||||
| +        return ENOMEM; | ||||
| +    } | ||||
| + | ||||
| +    fi->fh = fh; | ||||
| +    if (lo->cache == CACHE_NONE) { | ||||
| +        fi->direct_io = 1; | ||||
| +    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| +        fi->keep_cache = 1; | ||||
| +    } | ||||
| +    return 0; | ||||
| +} | ||||
| + | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|                        mode_t mode, struct fuse_file_info *fi) | ||||
|  { | ||||
| @@ -1594,7 +1626,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          ssize_t fh; | ||||
|   | ||||
|          pthread_mutex_lock(&lo->mutex); | ||||
| -        fh = lo_add_fd_mapping(req, fd); | ||||
| +        fh = lo_add_fd_mapping(lo, fd); | ||||
|          pthread_mutex_unlock(&lo->mutex); | ||||
|          if (fh == -1) { | ||||
|              close(fd); | ||||
| @@ -1785,38 +1817,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||
|   | ||||
|  static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    int fd; | ||||
| -    ssize_t fh; | ||||
| -    char buf[64]; | ||||
|      struct lo_data *lo = lo_data(req); | ||||
| +    struct lo_inode *inode = lo_inode(req, ino); | ||||
| +    int err; | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, | ||||
|               fi->flags); | ||||
|   | ||||
| -    update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
| - | ||||
| -    sprintf(buf, "%i", lo_fd(req, ino)); | ||||
| -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| -    if (fd == -1) { | ||||
| -        return (void)fuse_reply_err(req, errno); | ||||
| -    } | ||||
| - | ||||
| -    pthread_mutex_lock(&lo->mutex); | ||||
| -    fh = lo_add_fd_mapping(req, fd); | ||||
| -    pthread_mutex_unlock(&lo->mutex); | ||||
| -    if (fh == -1) { | ||||
| -        close(fd); | ||||
| -        fuse_reply_err(req, ENOMEM); | ||||
| +    if (!inode) { | ||||
| +        fuse_reply_err(req, EBADF); | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    fi->fh = fh; | ||||
| -    if (lo->cache == CACHE_NONE) { | ||||
| -        fi->direct_io = 1; | ||||
| -    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| -        fi->keep_cache = 1; | ||||
| +    err = lo_do_open(lo, inode, fi); | ||||
| +    lo_inode_put(lo, &inode); | ||||
| +    if (err) { | ||||
| +        fuse_reply_err(req, err); | ||||
| +    } else { | ||||
| +        fuse_reply_open(req, fi); | ||||
|      } | ||||
| -    fuse_reply_open(req, fi); | ||||
|  } | ||||
|   | ||||
|  static void lo_release(fuse_req_t req, fuse_ino_t ino, | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,109 @@ | ||||
| From 51b4b251442ade9163c9c9f953e5db00340ab3f1 Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 15:02:07 +0000 | ||||
| Subject: [PATCH] virtiofsd: optionally return inode pointer from | ||||
|  lo_do_lookup() | ||||
|  | ||||
| lo_do_lookup() finds an existing inode or allocates a new one. It | ||||
| increments nlookup so that the inode stays alive until the client | ||||
| releases it. | ||||
|  | ||||
| Existing callers don't need the struct lo_inode so the function doesn't | ||||
| return it. Extend the function to optionally return the inode. The next | ||||
| commit will need it. | ||||
|  | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210204150208.367837-3-stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- | ||||
|  1 file changed, 21 insertions(+), 8 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index eaec0c5a..a874f509 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -752,11 +752,13 @@ static void posix_locks_value_destroy(gpointer data) | ||||
|  } | ||||
|   | ||||
|  /* | ||||
| - * Increments nlookup and caller must release refcount using | ||||
| - * lo_inode_put(&parent). | ||||
| + * Increments nlookup on the inode on success. unref_inode_lolocked() must be | ||||
| + * called eventually to decrement nlookup again. If inodep is non-NULL, the | ||||
| + * inode pointer is stored and the caller must call lo_inode_put(). | ||||
|   */ | ||||
|  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
| -                        struct fuse_entry_param *e) | ||||
| +                        struct fuse_entry_param *e, | ||||
| +                        struct lo_inode **inodep) | ||||
|  { | ||||
|      int newfd; | ||||
|      int res; | ||||
| @@ -765,6 +767,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|      struct lo_inode *inode = NULL; | ||||
|      struct lo_inode *dir = lo_inode(req, parent); | ||||
|   | ||||
| +    if (inodep) { | ||||
| +        *inodep = NULL; | ||||
| +    } | ||||
| + | ||||
|      /* | ||||
|       * name_to_handle_at() and open_by_handle_at() can reach here with fuse | ||||
|       * mount point in guest, but we don't have its inode info in the | ||||
| @@ -825,7 +831,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          pthread_mutex_unlock(&lo->mutex); | ||||
|      } | ||||
|      e->ino = inode->fuse_ino; | ||||
| -    lo_inode_put(lo, &inode); | ||||
| + | ||||
| +    /* Transfer ownership of inode pointer to caller or drop it */ | ||||
| +    if (inodep) { | ||||
| +        *inodep = inode; | ||||
| +    } else { | ||||
| +        lo_inode_put(lo, &inode); | ||||
| +    } | ||||
| + | ||||
|      lo_inode_put(lo, &dir); | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent, | ||||
| @@ -860,7 +873,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    err = lo_do_lookup(req, parent, name, &e); | ||||
| +    err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      if (err) { | ||||
|          fuse_reply_err(req, err); | ||||
|      } else { | ||||
| @@ -968,7 +981,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, | ||||
|          goto out; | ||||
|      } | ||||
|   | ||||
| -    saverr = lo_do_lookup(req, parent, name, &e); | ||||
| +    saverr = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      if (saverr) { | ||||
|          goto out; | ||||
|      } | ||||
| @@ -1437,7 +1450,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | ||||
|   | ||||
|          if (plus) { | ||||
|              if (!is_dot_or_dotdot(name)) { | ||||
| -                err = lo_do_lookup(req, ino, name, &e); | ||||
| +                err = lo_do_lookup(req, ino, name, &e, NULL); | ||||
|                  if (err) { | ||||
|                      goto error; | ||||
|                  } | ||||
| @@ -1635,7 +1648,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|          } | ||||
|   | ||||
|          fi->fh = fh; | ||||
| -        err = lo_do_lookup(req, parent, name, &e); | ||||
| +        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
|      } | ||||
|      if (lo->cache == CACHE_NONE) { | ||||
|          fi->direct_io = 1; | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,298 @@ | ||||
| From 0d3f3e7b142814bbd6cda096cb52891cca983c92 Mon Sep 17 00:00:00 2001 | ||||
| From: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Date: Thu, 4 Feb 2021 20:20:00 +0100 | ||||
| Subject: [PATCH] virtiofsd: prevent opening of special files (CVE-2020-35517) | ||||
|  | ||||
| A well-behaved FUSE client does not attempt to open special files with | ||||
| FUSE_OPEN because they are handled on the client side (e.g. device nodes | ||||
| are handled by client-side device drivers). | ||||
|  | ||||
| The check to prevent virtiofsd from opening special files is missing in | ||||
| a few cases, most notably FUSE_OPEN. A malicious client can cause | ||||
| virtiofsd to open a device node, potentially allowing the guest to | ||||
| escape. This can be exploited by a modified guest device driver. It is | ||||
| not exploitable from guest userspace since the guest kernel will handle | ||||
| special files inside the guest instead of sending FUSE requests. | ||||
|  | ||||
| This patch fixes this issue by introducing the lo_inode_open() function | ||||
| to check the file type before opening it. This is a short-term solution | ||||
| because it does not prevent a compromised virtiofsd process from opening | ||||
| device nodes on the host. | ||||
|  | ||||
| Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT | ||||
| | O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not | ||||
| necessary here. If the file exists and the user did not specify O_EXCL, | ||||
| open it via lo_do_open(). | ||||
|  | ||||
| Reported-by: Alex Xu <alex@alxu.ca> | ||||
| Fixes: CVE-2020-35517 | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Reviewed-by: Vivek Goyal <vgoyal@redhat.com> | ||||
| Reviewed-by: Greg Kurz <groug@kaod.org> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-Id: <20210204150208.367837-4-stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- | ||||
|  1 file changed, 92 insertions(+), 52 deletions(-) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c | ||||
| index a874f509..41ff708a 100644 | ||||
| --- a/tools/virtiofsd/passthrough_ll.c | ||||
| +++ b/tools/virtiofsd/passthrough_ll.c | ||||
| @@ -538,6 +538,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) | ||||
|      return fd; | ||||
|  } | ||||
|   | ||||
| +/* | ||||
| + * Open a file descriptor for an inode. Returns -EBADF if the inode is not a | ||||
| + * regular file or a directory. | ||||
| + * | ||||
| + * Use this helper function instead of raw openat(2) to prevent security issues | ||||
| + * when a malicious client opens special files such as block device nodes. | ||||
| + * Symlink inodes are also rejected since symlinks must already have been | ||||
| + * traversed on the client side. | ||||
| + */ | ||||
| +static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| +                         int open_flags) | ||||
| +{ | ||||
| +    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); | ||||
| +    int fd; | ||||
| + | ||||
| +    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { | ||||
| +        return -EBADF; | ||||
| +    } | ||||
| + | ||||
| +    /* | ||||
| +     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier | ||||
| +     * that the inode is not a special file but if an external process races | ||||
| +     * with us then symlinks are traversed here. It is not possible to escape | ||||
| +     * the shared directory since it is mounted as "/" though. | ||||
| +     */ | ||||
| +    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); | ||||
| +    if (fd < 0) { | ||||
| +        return -errno; | ||||
| +    } | ||||
| +    return fd; | ||||
| +} | ||||
| + | ||||
|  static void lo_init(void *userdata, struct fuse_conn_info *conn) | ||||
|  { | ||||
|      struct lo_data *lo = (struct lo_data *)userdata; | ||||
| @@ -661,9 +693,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, | ||||
|          if (fi) { | ||||
|              truncfd = fd; | ||||
|          } else { | ||||
| -            sprintf(procname, "%i", ifd); | ||||
| -            truncfd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||
| +            truncfd = lo_inode_open(lo, inode, O_RDWR); | ||||
|              if (truncfd < 0) { | ||||
| +                errno = -truncfd; | ||||
|                  goto out_err; | ||||
|              } | ||||
|          } | ||||
| @@ -768,7 +800,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|      struct lo_inode *dir = lo_inode(req, parent); | ||||
|   | ||||
|      if (inodep) { | ||||
| -        *inodep = NULL; | ||||
| +        *inodep = NULL; /* in case there is an error */ | ||||
|      } | ||||
|   | ||||
|      /* | ||||
| @@ -1567,19 +1599,26 @@ static void update_open_flags(int writeback, int allow_direct_io, | ||||
|      } | ||||
|  } | ||||
|   | ||||
| +/* | ||||
| + * Open a regular file, set up an fd mapping, and fill out the struct | ||||
| + * fuse_file_info for it. If existing_fd is not negative, use that fd instead | ||||
| + * opening a new one. Takes ownership of existing_fd. | ||||
| + * | ||||
| + * Returns 0 on success or a positive errno. | ||||
| + */ | ||||
|  static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
| -                      struct fuse_file_info *fi) | ||||
| +                      int existing_fd, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    char buf[64]; | ||||
|      ssize_t fh; | ||||
| -    int fd; | ||||
| +    int fd = existing_fd; | ||||
|   | ||||
|      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
| -    sprintf(buf, "%i", inode->fd); | ||||
| -    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | ||||
| -    if (fd == -1) { | ||||
| -        return errno; | ||||
| +    if (fd < 0) { | ||||
| +        fd = lo_inode_open(lo, inode, fi->flags); | ||||
| +        if (fd < 0) { | ||||
| +            return -fd; | ||||
| +        } | ||||
|      } | ||||
|   | ||||
|      pthread_mutex_lock(&lo->mutex); | ||||
| @@ -1602,9 +1641,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, | ||||
|  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|                        mode_t mode, struct fuse_file_info *fi) | ||||
|  { | ||||
| -    int fd; | ||||
| +    int fd = -1; | ||||
|      struct lo_data *lo = lo_data(req); | ||||
|      struct lo_inode *parent_inode; | ||||
| +    struct lo_inode *inode = NULL; | ||||
|      struct fuse_entry_param e; | ||||
|      int err; | ||||
|      struct lo_cred old = {}; | ||||
| @@ -1630,36 +1670,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, | ||||
|   | ||||
|      update_open_flags(lo->writeback, lo->allow_direct_io, fi); | ||||
|   | ||||
| -    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, | ||||
| -                mode); | ||||
| +    /* Try to create a new file but don't open existing files */ | ||||
| +    fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); | ||||
|      err = fd == -1 ? errno : 0; | ||||
| -    lo_restore_cred(&old); | ||||
|   | ||||
| -    if (!err) { | ||||
| -        ssize_t fh; | ||||
| +    lo_restore_cred(&old); | ||||
|   | ||||
| -        pthread_mutex_lock(&lo->mutex); | ||||
| -        fh = lo_add_fd_mapping(lo, fd); | ||||
| -        pthread_mutex_unlock(&lo->mutex); | ||||
| -        if (fh == -1) { | ||||
| -            close(fd); | ||||
| -            err = ENOMEM; | ||||
| -            goto out; | ||||
| -        } | ||||
| +    /* Ignore the error if file exists and O_EXCL was not given */ | ||||
| +    if (err && (err != EEXIST || (fi->flags & O_EXCL))) { | ||||
| +        goto out; | ||||
| +    } | ||||
|   | ||||
| -        fi->fh = fh; | ||||
| -        err = lo_do_lookup(req, parent, name, &e, NULL); | ||||
| +    err = lo_do_lookup(req, parent, name, &e, &inode); | ||||
| +    if (err) { | ||||
| +        goto out; | ||||
|      } | ||||
| -    if (lo->cache == CACHE_NONE) { | ||||
| -        fi->direct_io = 1; | ||||
| -    } else if (lo->cache == CACHE_ALWAYS) { | ||||
| -        fi->keep_cache = 1; | ||||
| + | ||||
| +    err = lo_do_open(lo, inode, fd, fi); | ||||
| +    fd = -1; /* lo_do_open() takes ownership of fd */ | ||||
| +    if (err) { | ||||
| +        /* Undo lo_do_lookup() nlookup ref */ | ||||
| +        unref_inode_lolocked(lo, inode, 1); | ||||
|      } | ||||
|   | ||||
|  out: | ||||
| +    lo_inode_put(lo, &inode); | ||||
|      lo_inode_put(lo, &parent_inode); | ||||
|   | ||||
|      if (err) { | ||||
| +        if (fd >= 0) { | ||||
| +            close(fd); | ||||
| +        } | ||||
| + | ||||
|          fuse_reply_err(req, err); | ||||
|      } else { | ||||
|          fuse_reply_create(req, &e, fi); | ||||
| @@ -1673,7 +1715,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||
|                                                        pid_t pid, int *err) | ||||
|  { | ||||
|      struct lo_inode_plock *plock; | ||||
| -    char procname[64]; | ||||
|      int fd; | ||||
|   | ||||
|      plock = | ||||
| @@ -1690,12 +1731,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | ||||
|      } | ||||
|   | ||||
|      /* Open another instance of file which can be used for ofd locks. */ | ||||
| -    sprintf(procname, "%i", inode->fd); | ||||
| - | ||||
|      /* TODO: What if file is not writable? */ | ||||
| -    fd = openat(lo->proc_self_fd, procname, O_RDWR); | ||||
| -    if (fd == -1) { | ||||
| -        *err = errno; | ||||
| +    fd = lo_inode_open(lo, inode, O_RDWR); | ||||
| +    if (fd < 0) { | ||||
| +        *err = -fd; | ||||
|          free(plock); | ||||
|          return NULL; | ||||
|      } | ||||
| @@ -1842,7 +1881,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|          return; | ||||
|      } | ||||
|   | ||||
| -    err = lo_do_open(lo, inode, fi); | ||||
| +    err = lo_do_open(lo, inode, -1, fi); | ||||
|      lo_inode_put(lo, &inode); | ||||
|      if (err) { | ||||
|          fuse_reply_err(req, err); | ||||
| @@ -1898,39 +1937,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||||
|  static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | ||||
|                       struct fuse_file_info *fi) | ||||
|  { | ||||
| +    struct lo_inode *inode = lo_inode(req, ino); | ||||
| +    struct lo_data *lo = lo_data(req); | ||||
|      int res; | ||||
|      int fd; | ||||
| -    char *buf; | ||||
|   | ||||
|      fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | ||||
|               (void *)fi); | ||||
|   | ||||
| -    if (!fi) { | ||||
| -        struct lo_data *lo = lo_data(req); | ||||
| - | ||||
| -        res = asprintf(&buf, "%i", lo_fd(req, ino)); | ||||
| -        if (res == -1) { | ||||
| -            return (void)fuse_reply_err(req, errno); | ||||
| -        } | ||||
| +    if (!inode) { | ||||
| +        fuse_reply_err(req, EBADF); | ||||
| +        return; | ||||
| +    } | ||||
|   | ||||
| -        fd = openat(lo->proc_self_fd, buf, O_RDWR); | ||||
| -        free(buf); | ||||
| -        if (fd == -1) { | ||||
| -            return (void)fuse_reply_err(req, errno); | ||||
| +    if (!fi) { | ||||
| +        fd = lo_inode_open(lo, inode, O_RDWR); | ||||
| +        if (fd < 0) { | ||||
| +            res = -fd; | ||||
| +            goto out; | ||||
|          } | ||||
|      } else { | ||||
|          fd = lo_fi_fd(req, fi); | ||||
|      } | ||||
|   | ||||
|      if (datasync) { | ||||
| -        res = fdatasync(fd); | ||||
| +        res = fdatasync(fd) == -1 ? errno : 0; | ||||
|      } else { | ||||
| -        res = fsync(fd); | ||||
| +        res = fsync(fd) == -1 ? errno : 0; | ||||
|      } | ||||
|      if (!fi) { | ||||
|          close(fd); | ||||
|      } | ||||
| -    fuse_reply_err(req, res == -1 ? errno : 0); | ||||
| +out: | ||||
| +    lo_inode_put(lo, &inode); | ||||
| +    fuse_reply_err(req, res); | ||||
|  } | ||||
|   | ||||
|  static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,31 @@ | ||||
| From 7670d5d5061a8d959a367bdd9d86b6b97a3bf4e9 Mon Sep 17 00:00:00 2001 | ||||
| From: Greg Kurz <groug@kaod.org> | ||||
| Date: Thu, 4 Feb 2021 20:23:16 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add _llseek to the seccomp whitelist | ||||
|  | ||||
| This is how glibc implements lseek(2) on POWER. | ||||
|  | ||||
| BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1917692 | ||||
| Signed-off-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210121171540.1449777-1-groug@kaod.org> | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/seccomp.c | 1 + | ||||
|  1 file changed, 1 insertion(+) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||
| index 7f93fe87..57642ff8 100644 | ||||
| --- a/tools/virtiofsd/seccomp.c | ||||
| +++ b/tools/virtiofsd/seccomp.c | ||||
| @@ -68,6 +68,7 @@ static const int syscall_whitelist[] = { | ||||
|      SCMP_SYS(linkat), | ||||
|      SCMP_SYS(listxattr), | ||||
|      SCMP_SYS(lseek), | ||||
| +    SCMP_SYS(_llseek), /* For POWER */ | ||||
|      SCMP_SYS(madvise), | ||||
|      SCMP_SYS(mkdirat), | ||||
|      SCMP_SYS(mknodat), | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
| @@ -0,0 +1,33 @@ | ||||
| From 82bc7d7c11ff868ca9b0f663e847f535eab4e3ac Mon Sep 17 00:00:00 2001 | ||||
| From: Greg Kurz <groug@kaod.org> | ||||
| Date: Thu, 4 Feb 2021 20:24:28 +0100 | ||||
| Subject: [PATCH] virtiofsd: Add restart_syscall to the seccomp whitelist | ||||
|  | ||||
| This is how linux restarts some system calls after SIGSTOP/SIGCONT. | ||||
| This is needed to avoid virtiofsd termination when resuming execution | ||||
| under GDB for example. | ||||
|  | ||||
| Signed-off-by: Greg Kurz <groug@kaod.org> | ||||
| Message-Id: <20210201193305.136390-1-groug@kaod.org> | ||||
| Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||||
| --- | ||||
|  tools/virtiofsd/seccomp.c | 1 + | ||||
|  1 file changed, 1 insertion(+) | ||||
|  | ||||
| diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c | ||||
| index 57642ff8..004f5026 100644 | ||||
| --- a/tools/virtiofsd/seccomp.c | ||||
| +++ b/tools/virtiofsd/seccomp.c | ||||
| @@ -91,6 +91,7 @@ static const int syscall_whitelist[] = { | ||||
|      SCMP_SYS(renameat), | ||||
|      SCMP_SYS(renameat2), | ||||
|      SCMP_SYS(removexattr), | ||||
| +    SCMP_SYS(restart_syscall), | ||||
|      SCMP_SYS(rt_sigaction), | ||||
|      SCMP_SYS(rt_sigprocmask), | ||||
|      SCMP_SYS(rt_sigreturn), | ||||
| --  | ||||
| 2.29.2 | ||||
|  | ||||
		Reference in New Issue
	
	Block a user