diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 7690b65d80..7179db8eec 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -305,7 +305,7 @@ parts: ;; *) - cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/ + cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* configs/devices/ ;; esac diff --git a/tools/packaging/qemu/patches/6.1.x/no_patches.txt b/tools/packaging/qemu/patches/6.1.x/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch deleted file mode 100644 index c0036d0079..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch +++ /dev/null @@ -1,36 +0,0 @@ -From da5d60ab13c9e31f775b34d7afe6d82fca7f2336 Mon Sep 17 00:00:00 2001 -From: Wainer dos Santos Moschetta -Date: Tue, 2 Feb 2021 13:46:24 -0500 -Subject: [PATCH] virtiofsd: Allow to build it without the tools - -This changed the Meson build script to allow virtiofsd be built even -though the tools build is disabled, thus honoring the --enable-virtiofsd -option. - -(Backport of commit xxxxxx) -Signed-off-by: Wainer dos Santos Moschetta ---- - tools/meson.build | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/tools/meson.build b/tools/meson.build -index fdce66857d..3e5a0abfa2 100644 ---- a/tools/meson.build -+++ b/tools/meson.build -@@ -10,8 +10,11 @@ if get_option('virtiofsd').enabled() - error('virtiofsd requires Linux') - elif not seccomp.found() or not libcap_ng.found() - error('virtiofsd requires libcap-ng-devel and seccomp-devel') -- elif not have_tools or 'CONFIG_VHOST_USER' not in config_host -- error('virtiofsd needs tools and vhost-user support') -+ elif 'CONFIG_VHOST_USER' not in config_host -+ error('virtiofsd needs vhost-user support') -+ else -+ # Disabled all the tools but virtiofsd. -+ have_virtiofsd = true - endif - endif - elif get_option('virtiofsd').disabled() or not have_system --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch deleted file mode 100644 index c5893642a1..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch +++ /dev/null @@ -1,155 +0,0 @@ -From bb506adc3bc3e3c0cad695b3bab126afdc3f0536 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 16 May 2019 15:11:35 +0100 -Subject: [PATCH 02/29] virtiofsd: add security guide document -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Many people want to know: what's up with virtiofsd and security? This -document provides the answers! - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé ---- - docs/tools/index.rst | 1 + - docs/tools/virtiofsd-security.rst | 118 ++++++++++++++++++++++++++++++ - 2 files changed, 119 insertions(+) - create mode 100644 docs/tools/virtiofsd-security.rst - -diff --git a/docs/tools/index.rst b/docs/tools/index.rst -index 3a5829c17a..d5b65f803b 100644 ---- a/docs/tools/index.rst -+++ b/docs/tools/index.rst -@@ -17,3 +17,4 @@ Contents: - qemu-trace-stap - virtfs-proxy-helper - virtiofsd -+ virtiofsd-security -diff --git a/docs/tools/virtiofsd-security.rst b/docs/tools/virtiofsd-security.rst -new file mode 100644 -index 0000000000..61ce551344 ---- /dev/null -+++ b/docs/tools/virtiofsd-security.rst -@@ -0,0 +1,118 @@ -+======================== -+Virtiofsd Security Guide -+======================== -+ -+Introduction -+============ -+This document covers security topics for users of virtiofsd, the daemon that -+implements host<->guest file system sharing. Sharing files between one or more -+guests and the host raises questions about the trust relationships between -+these entities. By understanding these topics users can safely deploy -+virtiofsd and control access to their data. -+ -+Architecture -+============ -+The virtiofsd daemon process acts as a vhost-user device backend, implementing -+the virtio-fs device that the corresponding device driver inside the guest -+interacts with. -+ -+There is one virtiofsd process per virtio-fs device instance. For example, -+when two guests have access to the same shared directory there are still two -+virtiofsd processes since there are two virtio-fs device instances. Similarly, -+if one guest has access to two shared directories, there are two virtiofsd -+processes since there are two virtio-fs device instances. -+ -+Files are created on the host with uid/gid values provided by the guest. -+Furthermore, virtiofsd is unable to enforce file permissions since guests have -+the ability to access any file within the shared directory. File permissions -+are implemented in the guest, just like with traditional local file systems. -+ -+Security Requirements -+===================== -+Guests have root access to the shared directory. This is necessary for root -+file systems on virtio-fs and similar use cases. -+ -+When multiple guests have access to the same shared directory, the guests have -+a trust relationship. A broken or malicious guest could delete or corrupt -+files. It could exploit symlink or time-of-check-to-time-of-use (TOCTOU) race -+conditions against applications in other guests. It could plant device nodes -+or setuid executables to gain privileges in other guests. It could perform -+denial-of-service (DoS) attacks by consuming available space or making the file -+system unavailable to other guests. -+ -+Guests are restricted to the shared directory and cannot access other files on -+the host. -+ -+Guests should not be able to gain arbitrary code execution inside the virtiofsd -+process. If they do, the process is sandboxed to prevent escaping into other -+parts of the host. -+ -+Daemon Sandboxing -+================= -+The virtiofsd process handles virtio-fs FUSE requests from the untrusted guest. -+This attack surface could give the guest access to host resources and must -+therefore be protected. Sandboxing mechanisms are integrated into virtiofsd to -+reduce the impact in the event that an attacker gains control of the process. -+ -+As a general rule, virtiofsd does not trust inputs from the guest, aside from -+uid/gid values. Input validation is performed so that the guest cannot corrupt -+memory or otherwise gain arbitrary code execution in the virtiofsd process. -+ -+Sandboxing adds restrictions on the virtiofsd so that even if an attacker is -+able to exploit a bug, they will be constrained to the virtiofsd process and -+unable to cause damage on the host. -+ -+Seccomp Whitelist -+----------------- -+Many system calls are not required by virtiofsd to perform its function. For -+example, ptrace(2) and execve(2) are not necessary and attackers are likely to -+use them to further compromise the system. This is prevented using a seccomp -+whitelist in virtiofsd. -+ -+During startup virtiofsd installs a whitelist of allowed system calls. All -+other system calls are forbidden for the remaining lifetime of the process. -+This list has been built through experience of running virtiofsd on several -+flavors of Linux and observing which system calls were encountered. -+ -+It is possible that previously unexplored code paths or newer library versions -+will invoke system calls that have not been whitelisted yet. In this case the -+process terminates and a seccomp error is captured in the audit log. The log -+can typically be viewed using ``journalctl -xe`` and searching for ``SECCOMP``. -+ -+Should it be necessary to extend the whitelist, system call numbers from the -+audit log can be translated to names through a CPU architecture-specific -+``.tbl`` file in the Linux source tree. They can then be added to the -+whitelist in ``seccomp.c`` in the virtiofsd source tree. -+ -+Mount Namespace -+--------------- -+During startup virtiofsd enters a new mount namespace and releases all mounts -+except for the shared directory. This makes the file system root `/` the -+shared directory. It is impossible to access files outside the shared -+directory since they cannot be looked up by path resolution. -+ -+Several attacks, including `..` traversal and symlink escapes, are prevented by -+the mount namespace. -+ -+The current virtiofsd implementation keeps a directory file descriptor to -+/proc/self/fd open in order to implement several FUSE requests. This file -+descriptor could be used by attackers to access files outside the shared -+directory. This limitation will be addressed in a future release of virtiofsd. -+ -+Other Namespaces -+---------------- -+Virtiofsd enters new pid and network namespaces during startup. The pid -+namespace prevents the process from seeing other processes running on the host. -+The network namespace removes network connectivity from the process. -+ -+Deployment Best Practices -+========================= -+The shared directory should be a separate file system so that untrusted guests -+cannot cause a denial-of-service by using up all available inodes or exhausting -+free space. -+ -+If the shared directory is also accessible from a host mount namespace, it is -+recommended to keep a parent directory with rwx------ permissions so that other -+users on the host are unable to access any setuid executables or device nodes -+in the shared directory. The `nosuid` and `nodev` mount options can also be -+used to prevent this issue. --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch deleted file mode 100644 index 7afe567577..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 800ce0d08e09320ac2f1bd9125cb07d14a2689fe Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 7 Feb 2019 18:39:31 +0000 -Subject: [PATCH 03/29] DAX contrib/libvhost-user: Add virtio-fs slave types - -Add virtio-fs definitions to libvhost-user - -Signed-off-by: Dr. David Alan Gilbert ---- - subprojects/libvhost-user/libvhost-user.c | 32 +++++++++++++++++++++++ - subprojects/libvhost-user/libvhost-user.h | 31 ++++++++++++++++++++++ - 2 files changed, 63 insertions(+) - -diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c -index 3b1b5c385f..9b8223b5d5 100644 ---- a/subprojects/libvhost-user/libvhost-user.c -+++ b/subprojects/libvhost-user/libvhost-user.c -@@ -2910,3 +2910,35 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, - vu_queue_flush(dev, vq, 1); - vu_queue_inflight_post_put(dev, vq, elem->index); - } -+ -+bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -+ VhostUserFSSlaveMsg *fsm) -+{ -+ int fd_num = 0; -+ VhostUserMsg vmsg = { -+ .request = req, -+ .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, -+ .size = sizeof(vmsg.payload.fs), -+ .payload.fs = *fsm, -+ }; -+ -+ if (fd != -1) { -+ vmsg.fds[fd_num++] = fd; -+ } -+ -+ vmsg.fd_num = fd_num; -+ -+ if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { -+ return false; -+ } -+ -+ pthread_mutex_lock(&dev->slave_mutex); -+ if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { -+ pthread_mutex_unlock(&dev->slave_mutex); -+ return false; -+ } -+ -+ /* Also unlocks the slave_mutex */ -+ return vu_process_message_reply(dev, &vmsg); -+} -+ -diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h -index f0aca2b216..f3b0998eea 100644 ---- a/subprojects/libvhost-user/libvhost-user.h -+++ b/subprojects/libvhost-user/libvhost-user.h -@@ -122,6 +122,24 @@ typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_MAX - } VhostUserSlaveRequest; - -+/* Structures carried over the slave channel back to QEMU */ -+#define VHOST_USER_FS_SLAVE_ENTRIES 8 -+ -+/* For the flags field of VhostUserFSSlaveMsg */ -+#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0) -+#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1) -+ -+typedef struct { -+ /* Offsets within the file being mapped */ -+ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Offsets within the cache */ -+ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Lengths of sections */ -+ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Flags, from VHOST_USER_FS_FLAG_* */ -+ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES]; -+} VhostUserFSSlaveMsg; -+ - typedef struct VhostUserMemoryRegion { - uint64_t guest_phys_addr; - uint64_t memory_size; -@@ -197,6 +215,7 @@ typedef struct VhostUserMsg { - VhostUserConfig config; - VhostUserVringArea area; - VhostUserInflight inflight; -+ VhostUserFSSlaveMsg fs; - } payload; - - int fds[VHOST_MEMORY_BASELINE_NREGIONS]; -@@ -693,4 +712,16 @@ void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, - bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, - unsigned int out_bytes); - -+/** -+ * vu_fs_cache_request: Send a slave message for an fs client -+ * @dev: a VuDev context -+ * @req: The request type (map, unmap, sync) -+ * @fd: an fd (only required for map, else must be -1) -+ * @fsm: The body of the message -+ * -+ * Returns: true if the reply was 0 -+ */ -+bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -+ VhostUserFSSlaveMsg *fsm); -+ - #endif /* LIBVHOST_USER_H */ --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch deleted file mode 100644 index e60b5a9d1f..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 27ccc5e4aecbffd590199bae897a8359889fd54d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 4 Jul 2018 18:51:42 +0100 -Subject: [PATCH 06/29] DAX: virtio-fs: Add vhost-user slave commands for - mapping - -The daemon may request that fd's be mapped into the virtio-fs cache -visible to the guest. -These mappings are triggered by commands sent over the slave fd -from the daemon. - -Signed-off-by: Dr. David Alan Gilbert ---- - docs/interop/vhost-user.rst | 23 ++++++++++++++++++++++ - hw/virtio/vhost-user-fs.c | 19 ++++++++++++++++++ - hw/virtio/vhost-user.c | 18 +++++++++++++++++ - include/hw/virtio/vhost-user-fs.h | 24 +++++++++++++++++++++++ - subprojects/libvhost-user/libvhost-user.h | 3 +++ - 5 files changed, 87 insertions(+) - -diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst -index d6085f7045..056f94c6fb 100644 ---- a/docs/interop/vhost-user.rst -+++ b/docs/interop/vhost-user.rst -@@ -1432,6 +1432,29 @@ Slave message types - - The state.num field is currently reserved and must be set to 0. - -+``VHOST_USER_SLAVE_FS_MAP`` -+ :id: 6 -+ :equivalent ioctl: N/A -+ :slave payload: fd + n * (offset + address + len) -+ :master payload: N/A -+ -+ Requests that the QEMU mmap the given fd into the virtio-fs cache; -+ multiple chunks can be mapped in one command. -+ A reply is generated indicating whether mapping succeeded. -+ -+``VHOST_USER_SLAVE_FS_UNMAP`` -+ :id: 7 -+ :equivalent ioctl: N/A -+ :slave payload: n * (address + len) -+ :master payload: N/A -+ -+ Requests that the QEMU un-mmap the given range in the virtio-fs cache; -+ multiple chunks can be unmapped in one command. -+ A reply is generated indicating whether unmapping succeeded. -+ -+``VHOST_USER_SLAVE_FS_SYNC`` -+ [Semantic details TBD] -+ - .. _reply_ack: - - VHOST_USER_PROTOCOL_F_REPLY_ACK -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index d111bf2af3..9c35fdbeab 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -33,6 +33,25 @@ - #define DAX_WINDOW_PROT PROT_NONE - #endif - -+int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd) -+{ -+ /* TODO */ -+ return -1; -+} -+ -+int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) -+{ -+ /* TODO */ -+ return -1; -+} -+ -+int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) -+{ -+ /* TODO */ -+ return -1; -+} -+ - static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) - { - VHostUserFS *fs = VHOST_USER_FS(vdev); -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 2fdd5daf74..757dee0d1e 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -12,6 +12,7 @@ - #include "qapi/error.h" - #include "hw/virtio/vhost.h" - #include "hw/virtio/vhost-user.h" -+#include "hw/virtio/vhost-user-fs.h" - #include "hw/virtio/vhost-backend.h" - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-net.h" -@@ -132,6 +133,11 @@ typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, -+ VHOST_USER_SLAVE_VRING_CALL = 4, -+ VHOST_USER_SLAVE_VRING_ERR = 5, -+ VHOST_USER_SLAVE_FS_MAP = 6, -+ VHOST_USER_SLAVE_FS_UNMAP = 7, -+ VHOST_USER_SLAVE_FS_SYNC = 8, - VHOST_USER_SLAVE_MAX - } VhostUserSlaveRequest; - -@@ -218,6 +224,7 @@ typedef union { - VhostUserCryptoSession session; - VhostUserVringArea area; - VhostUserInflight inflight; -+ VhostUserFSSlaveMsg fs; - } VhostUserPayload; - - typedef struct VhostUserMsg { -@@ -1470,6 +1477,17 @@ static void slave_read(void *opaque) - ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, - fd[0]); - break; -+#ifdef CONFIG_VHOST_USER_FS -+ case VHOST_USER_SLAVE_FS_MAP: -+ ret = vhost_user_fs_slave_map(dev, &payload.fs, fd[0]); -+ break; -+ case VHOST_USER_SLAVE_FS_UNMAP: -+ ret = vhost_user_fs_slave_unmap(dev, &payload.fs); -+ break; -+ case VHOST_USER_SLAVE_FS_SYNC: -+ ret = vhost_user_fs_slave_sync(dev, &payload.fs); -+ break; -+#endif - default: - error_report("Received unexpected msg type: %d.", hdr.request); - ret = -EINVAL; -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index df6bf2a926..69cc6340ed 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -23,6 +23,24 @@ - #define TYPE_VHOST_USER_FS "vhost-user-fs-device" - OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS) - -+/* Structures carried over the slave channel back to QEMU */ -+#define VHOST_USER_FS_SLAVE_ENTRIES 8 -+ -+/* For the flags field of VhostUserFSSlaveMsg */ -+#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0) -+#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1) -+ -+typedef struct { -+ /* Offsets within the file being mapped */ -+ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Offsets within the cache */ -+ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Lengths of sections */ -+ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES]; -+ /* Flags, from VHOST_USER_FS_FLAG_* */ -+ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES]; -+} VhostUserFSSlaveMsg; -+ - typedef struct { - CharBackend chardev; - char *tag; -@@ -45,4 +63,10 @@ struct VHostUserFS { - MemoryRegion cache; - }; - -+/* Callbacks from the vhost-user code for slave commands */ -+int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd); -+int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); -+int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); -+ - #endif /* _QEMU_VHOST_USER_FS_H */ -diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h -index f3b0998eea..c63a590069 100644 ---- a/subprojects/libvhost-user/libvhost-user.h -+++ b/subprojects/libvhost-user/libvhost-user.h -@@ -119,6 +119,9 @@ typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_VRING_CALL = 4, - VHOST_USER_SLAVE_VRING_ERR = 5, -+ VHOST_USER_SLAVE_FS_MAP = 6, -+ VHOST_USER_SLAVE_FS_UNMAP = 7, -+ VHOST_USER_SLAVE_FS_SYNC = 8, - VHOST_USER_SLAVE_MAX - } VhostUserSlaveRequest; - --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch deleted file mode 100644 index 8f75dd706e..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 3de89ce9fb5eda46f7cefd70e9090cb7cd7ec803 Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Wed, 28 Mar 2018 20:14:53 +0800 -Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O - performance - -This is a quick workaround, need to be fixed. - -Signed-off-by: Chao Peng ---- - hw/9pfs/9p.c | 12 +++++------- - hw/9pfs/9p.h | 6 +++--- - hw/9pfs/coth.h | 3 +++ - 3 files changed, 11 insertions(+), 10 deletions(-) - -diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c -index 9e046f7acb..11c8ee08d9 100644 ---- a/hw/9pfs/9p.c -+++ b/hw/9pfs/9p.c -@@ -1082,10 +1082,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) - out_notify: - pdu->s->transport->push_and_notify(pdu); - -- /* Now wakeup anybody waiting in flush for this request */ -- if (!qemu_co_queue_next(&pdu->complete)) { -- pdu_free(pdu); -- } -+ pdu_free(pdu); - } - - static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) -@@ -3997,7 +3994,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu) - - void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) - { -- Coroutine *co; -+// Coroutine *co; - CoroutineEntry *handler; - V9fsState *s = pdu->s; - -@@ -4015,8 +4012,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) - } - - qemu_co_queue_init(&pdu->complete); -- co = qemu_coroutine_create(handler, pdu); -- qemu_coroutine_enter(co); -+ handler(pdu); -+ //co = qemu_coroutine_create(handler, pdu); -+ //qemu_coroutine_enter(co); - } - - /* Returns 0 on success, 1 on failure. */ -diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h -index b8f72a3bd9..d16bf9d05e 100644 ---- a/hw/9pfs/9p.h -+++ b/hw/9pfs/9p.h -@@ -391,21 +391,21 @@ extern int total_open_fd; - static inline void v9fs_path_write_lock(V9fsState *s) - { - if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { -- qemu_co_rwlock_wrlock(&s->rename_lock); -+ // qemu_co_rwlock_wrlock(&s->rename_lock); - } - } - - static inline void v9fs_path_read_lock(V9fsState *s) - { - if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { -- qemu_co_rwlock_rdlock(&s->rename_lock); -+ // qemu_co_rwlock_rdlock(&s->rename_lock); - } - } - - static inline void v9fs_path_unlock(V9fsState *s) - { - if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { -- qemu_co_rwlock_unlock(&s->rename_lock); -+ // qemu_co_rwlock_unlock(&s->rename_lock); - } - } - -diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h -index c2cdc7a9ea..0fe971d1f5 100644 ---- a/hw/9pfs/coth.h -+++ b/hw/9pfs/coth.h -@@ -46,6 +46,9 @@ - qemu_coroutine_yield(); \ - } while (0) - -+#undef v9fs_co_run_in_worker -+#define v9fs_co_run_in_worker(code_block) do {code_block} while(0); -+ - void co_run_in_worker_bh(void *); - int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *); - int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **); --- -2.21.0 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch deleted file mode 100644 index 0bb6e10a90..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch +++ /dev/null @@ -1,196 +0,0 @@ -From a0d09868a25b9b15b8ef49402b035597ef889f85 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 4 Jul 2018 20:01:51 +0100 -Subject: [PATCH 07/29] DAX: virtio-fs: Fill in slave commands for mapping - -Fill in definitions for map, unmap and sync commands. - -Signed-off-by: Dr. David Alan Gilbert -with fix by misono.tomohiro@fujitsu.com ---- - hw/virtio/vhost-user-fs.c | 161 ++++++++++++++++++++++++++++++++++++-- - 1 file changed, 155 insertions(+), 6 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index 9c35fdbeab..98cec993f7 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -36,20 +36,169 @@ - int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - int fd) - { -- /* TODO */ -- return -1; -+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); -+ if (!fs) { -+ /* Shouldn't happen - but seen on error path */ -+ fprintf(stderr, "%s: Bad fs ptr\n", __func__); -+ return -1; -+ } -+ size_t cache_size = fs->conf.cache_size; -+ if (!cache_size) { -+ fprintf(stderr, "%s: map when DAX cache not present\n", __func__); -+ return -1; -+ } -+ void *cache_host = memory_region_get_ram_ptr(&fs->cache); -+ -+ unsigned int i; -+ int res = 0; -+ -+ if (fd < 0) { -+ fprintf(stderr, "%s: Bad fd for map\n", __func__); -+ return -1; -+ } -+ -+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { -+ if (sm->len[i] == 0) { -+ continue; -+ } -+ -+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || -+ (sm->c_offset[i] + sm->len[i]) > cache_size) { -+ fprintf(stderr, "%s: Bad offset/len for map [%d] %" -+ PRIx64 "+%" PRIx64 "\n", __func__, -+ i, sm->c_offset[i], sm->len[i]); -+ res = -1; -+ break; -+ } -+ -+ if (mmap(cache_host + sm->c_offset[i], sm->len[i], -+ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) ? PROT_READ : 0) | -+ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0), -+ MAP_SHARED | MAP_FIXED, -+ fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) { -+ fprintf(stderr, "%s: map failed err %d [%d] %" -+ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, -+ errno, i, sm->c_offset[i], sm->len[i], -+ sm->fd_offset[i]); -+ res = -1; -+ break; -+ } -+ } -+ -+ if (res) { -+ /* Something went wrong, unmap them all */ -+ vhost_user_fs_slave_unmap(dev, sm); -+ } -+ return res; - } - - int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - { -- /* TODO */ -- return -1; -+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); -+ if (!fs) { -+ /* Shouldn't happen - but seen on error path */ -+ fprintf(stderr, "%s: Bad fs ptr\n", __func__); -+ return -1; -+ } -+ size_t cache_size = fs->conf.cache_size; -+ if (!cache_size) { -+ /* -+ * Since dax cache is disabled, there should be no unmap request. -+ * Howerver we still receives whole range unmap request during umount -+ * for cleanup. Ignore it. -+ */ -+ if (sm->len[0] == ~(uint64_t)0) { -+ return 0; -+ } -+ -+ fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__); -+ return -1; -+ } -+ void *cache_host = memory_region_get_ram_ptr(&fs->cache); -+ -+ unsigned int i; -+ int res = 0; -+ -+ /* -+ * Note even if one unmap fails we try the rest, since the effect -+ * is to clean up as much as possible. -+ */ -+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { -+ void *ptr; -+ if (sm->len[i] == 0) { -+ continue; -+ } -+ -+ if (sm->len[i] == ~(uint64_t)0) { -+ /* Special case meaning the whole arena */ -+ sm->len[i] = cache_size; -+ } -+ -+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || -+ (sm->c_offset[i] + sm->len[i]) > cache_size) { -+ fprintf(stderr, "%s: Bad offset/len for unmap [%d] %" -+ PRIx64 "+%" PRIx64 "\n", __func__, -+ i, sm->c_offset[i], sm->len[i]); -+ res = -1; -+ continue; -+ } -+ -+ ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT, -+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); -+ if (ptr != (cache_host + sm->c_offset[i])) { -+ fprintf(stderr, "%s: mmap failed (%s) [%d] %" -+ PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n", -+ __func__, -+ strerror(errno), -+ i, sm->c_offset[i], sm->len[i], -+ sm->fd_offset[i], ptr); -+ res = -1; -+ } -+ } -+ -+ return res; - } - - int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - { -- /* TODO */ -- return -1; -+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); -+ size_t cache_size = fs->conf.cache_size; -+ if (!cache_size) { -+ fprintf(stderr, "%s: sync when DAX cache not present\n", __func__); -+ return -1; -+ } -+ void *cache_host = memory_region_get_ram_ptr(&fs->cache); -+ -+ unsigned int i; -+ int res = 0; -+ -+ /* Note even if one sync fails we try the rest */ -+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { -+ if (sm->len[i] == 0) { -+ continue; -+ } -+ -+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || -+ (sm->c_offset[i] + sm->len[i]) > cache_size) { -+ fprintf(stderr, "%s: Bad offset/len for sync [%d] %" -+ PRIx64 "+%" PRIx64 "\n", __func__, -+ i, sm->c_offset[i], sm->len[i]); -+ res = -1; -+ continue; -+ } -+ -+ if (msync(cache_host + sm->c_offset[i], sm->len[i], -+ MS_SYNC /* ?? */)) { -+ fprintf(stderr, "%s: msync failed (%s) [%d] %" -+ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, -+ strerror(errno), -+ i, sm->c_offset[i], sm->len[i], -+ sm->fd_offset[i]); -+ res = -1; -+ } -+ } -+ -+ return res; - } - - static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch deleted file mode 100644 index 1765a2dd4f..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch +++ /dev/null @@ -1,99 +0,0 @@ -From b341b9541023b0a9f0a315ef24e81522b273e552 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 5 Jul 2018 18:20:34 +0100 -Subject: [PATCH 08/29] DAX: virtiofsd Add cache accessor functions - -Add low level functions that the clients can use to map/unmap/sync cache -areas. - -Signed-off-by: Dr. David Alan Gilbert ---- - tools/virtiofsd/fuse_lowlevel.h | 31 +++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.c | 27 +++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 0e10a14bc9..b0d111bcb2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -29,6 +29,8 @@ - #include - #include - -+#include "subprojects/libvhost-user/libvhost-user.h" -+ - /* - * Miscellaneous definitions - */ -@@ -1970,4 +1972,33 @@ void fuse_session_process_buf(struct fuse_session *se, - */ - int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); - -+/** -+ * For use with virtio-fs; request an fd be mapped into the cache -+ * -+ * @param req The request that triggered this action -+ * @param msg A set of mapping requests -+ * @param fd The fd to map -+ * @return Zero on success -+ */ -+int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); -+ -+/** -+ * For use with virtio-fs; request unmapping of part of the cache -+ * -+ * @param se The session this request is on -+ * @param msg A set of unmapping requests -+ * @return Zero on success -+ */ -+int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); -+ -+/** -+ * For use with virtio-fs; request synchronisation of part of the cache -+ * [Semantics TBD] -+ * -+ * @param req The request that triggered this action -+ * @param msg A set of syncing requests -+ * @return Zero on success -+ */ -+int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); -+ - #endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index bd19358437..24d9323665 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -1044,3 +1044,30 @@ void virtio_session_close(struct fuse_session *se) - free(se->virtio_dev); - se->virtio_dev = NULL; - } -+ -+int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) -+{ -+ if (!req->se->virtio_dev) { -+ return -ENODEV; -+ } -+ return !vu_fs_cache_request(&req->se->virtio_dev->dev, -+ VHOST_USER_SLAVE_FS_MAP, fd, msg); -+} -+ -+int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) -+{ -+ if (!se->virtio_dev) { -+ return -ENODEV; -+ } -+ return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, -+ -1, msg); -+} -+ -+int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) -+{ -+ if (!req->se->virtio_dev) { -+ return -ENODEV; -+ } -+ return !vu_fs_cache_request(&req->se->virtio_dev->dev, -+ VHOST_USER_SLAVE_FS_SYNC, -1, msg); -+} --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch deleted file mode 100644 index 55833230aa..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 15fb0e84e38c2681e855e69b58414ba831b399bf Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 9 Jul 2018 19:57:16 +0100 -Subject: [PATCH 11/29] DAX: virtiofsd: Wire up passthrough_ll's - lo_setupmapping - -Wire up passthrough_ll's setupmapping to allocate, send to virtio -and then reply OK. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Vivek Goyal ---- - tools/virtiofsd/passthrough_ll.c | 24 ++++++++++++++++++++++-- - 1 file changed, 22 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 784bdcff34..b57cb4079e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2895,8 +2895,28 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, - uint64_t len, uint64_t moffset, uint64_t flags, - struct fuse_file_info *fi) - { -- // TODO -- fuse_reply_err(req, ENOSYS); -+ int ret = 0; -+ VhostUserFSSlaveMsg msg = { 0 }; -+ uint64_t vhu_flags; -+ bool writable = flags & O_RDWR; -+ -+ vhu_flags = VHOST_USER_FS_FLAG_MAP_R; -+ if (writable) { -+ vhu_flags |= VHOST_USER_FS_FLAG_MAP_W; -+ } -+ -+ msg.fd_offset[0] = foffset; -+ msg.len[0] = len; -+ msg.c_offset[0] = moffset; -+ msg.flags[0] = vhu_flags; -+ -+ if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) { -+ fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__, -+ (int)fi->fh); -+ ret = EINVAL; -+ } -+ -+ fuse_reply_err(req, ret); - } - - static void lo_removemapping(fuse_req_t req, struct fuse_session *se, --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch deleted file mode 100644 index 59281423f3..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 17cf13d652885b2c3a09fbbab1cb503f53c27d96 Mon Sep 17 00:00:00 2001 -From: Vivek Goyal -Date: Mon, 13 Aug 2018 11:52:43 -0400 -Subject: [PATCH 12/29] DAX: virtiofsd: Make lo_removemapping() work - -Let guest pass in the offset in dax window a mapping is currently -mapped at and needs to be removed. - -Signed-off-by: Vivek Goyal ---- - tools/virtiofsd/passthrough_ll.c | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b57cb4079e..056b395574 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2923,8 +2923,20 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se, - fuse_ino_t ino, unsigned num, - struct fuse_removemapping_one *argp) - { -- // TODO -- fuse_reply_err(req, ENOSYS); -+ VhostUserFSSlaveMsg msg = { 0 }; -+ int ret = 0; -+ -+ msg.len[0] = argp->len; -+ msg.c_offset[0] = argp->moffset; -+ if (fuse_virtio_unmap(se, &msg)) { -+ fprintf(stderr, -+ "%s: unmap over virtio failed " -+ "(offset=0x%lx, len=0x%lx)\n", -+ __func__, argp->moffset, argp->len); -+ ret = EINVAL; -+ } -+ -+ fuse_reply_err(req, ret); - } - - static struct fuse_lowlevel_ops lo_oper = { --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch deleted file mode 100644 index e08fb14857..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 72bccc497aeb9057e36477c327e0ac58bc154e6f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 30 Nov 2018 11:50:25 +0000 -Subject: [PATCH 15/29] DAX: virtiofsd: Perform an unmap on destroy - -Force unmap all remaining dax cache entries on a destroy. - -Signed-off-by: Dr. David Alan Gilbert ---- - tools/virtiofsd/passthrough_ll.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0d3cda8d2f..56a4b9404a 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2875,6 +2875,17 @@ static void lo_destroy(void *userdata, struct fuse_session *se) - { - struct lo_data *lo = (struct lo_data *)userdata; - -+ if (fuse_lowlevel_is_virtio(se)) { -+ VhostUserFSSlaveMsg msg = { 0 }; -+ -+ msg.len[0] = ~(uint64_t)0; /* Special: means 'all' */ -+ msg.c_offset[0] = 0; -+ if (fuse_virtio_unmap(se, &msg)) { -+ fuse_log(FUSE_LOG_ERR, "%s: unmap during destroy failed\n", -+ __func__); -+ } -+ } -+ - pthread_mutex_lock(&lo->mutex); - while (true) { - GHashTableIter iter; --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch deleted file mode 100644 index 80d60b8a05..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch +++ /dev/null @@ -1,34 +0,0 @@ -From c05795e129152533d66f131dd019ae903d1eb39a Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 2 May 2019 18:04:04 +0100 -Subject: [PATCH 16/29] DAX: libvhost-user: Allow popping a queue element with - bad pointers - -Allow a daemon implemented with libvhost-user to accept an -element with pointers to memory that aren't in the mapping table. -The daemon might have some special way to deal with some special -cases of this. - -The default behaviour doesn't change. - -Signed-off-by: Dr. David Alan Gilbert ---- - block/export/vhost-user-blk-server.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index ab2c4d44c4..ea2d302e33 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -205,7 +205,7 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) - while (1) { - VuBlkReq *req; - -- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); -+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq), NULL, NULL); - if (!req) { - break; - } --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch deleted file mode 100644 index 3b843ae172..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch +++ /dev/null @@ -1,211 +0,0 @@ -From a238faf5a53668aac037f7ce026d1bf785ee4186 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 20 May 2019 11:54:02 +0100 -Subject: [PATCH 17/29] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO - -Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a -client to ask qemu to perform a read/write from an fd directly -to GPA. - -Signed-off-by: Dr. David Alan Gilbert ---- - docs/interop/vhost-user.rst | 11 +++ - hw/virtio/trace-events | 6 ++ - hw/virtio/vhost-user-fs.c | 87 +++++++++++++++++++++++ - hw/virtio/vhost-user.c | 4 ++ - include/hw/virtio/vhost-user-fs.h | 1 + - subprojects/libvhost-user/libvhost-user.h | 1 + - 6 files changed, 110 insertions(+) - -diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst -index 056f94c6fb..8d6ec92881 100644 ---- a/docs/interop/vhost-user.rst -+++ b/docs/interop/vhost-user.rst -@@ -1455,6 +1455,17 @@ Slave message types - ``VHOST_USER_SLAVE_FS_SYNC`` - [Semantic details TBD] - -+``VHOST_USER_SLAVE_FS_IO`` -+ :id: 9 -+ :equivalent ioctl: N/A -+ :slave payload: fd + n * (offset + address + len) -+ :master payload: N/A -+ -+ Requests that the QEMU performs IO directly from an fd to guest memory -+ on behalf of the daemon; this is normally for a case where a memory region -+ isn't visible to the daemon. -+ [Semantic details TBD] -+ - .. _reply_ack: - - VHOST_USER_PROTOCOL_F_REPLY_ACK -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 2060a144a2..a35adf5caf 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_set_owner(void *dev) "dev: %p" - vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 - -+# vhost-user-fs.c -+ -+vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd" -+vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd" -+vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd" -+ - # virtio.c - virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" - virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index 98cec993f7..82a32492a7 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -22,6 +22,8 @@ - #include "qemu/error-report.h" - #include "hw/virtio/vhost-user-fs.h" - #include "monitor/monitor.h" -+#include "exec/address-spaces.h" -+#include "trace.h" - - /* - * The powerpc kernel code expects the memory to be accessible during -@@ -201,6 +203,91 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - return res; - } - -+int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd) -+{ -+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); -+ if (!fs) { -+ /* Shouldn't happen - but seen it in error paths */ -+ fprintf(stderr, "%s: Bad fs ptr\n", __func__); -+ return -1; -+ } -+ -+ unsigned int i; -+ int res = 0; -+ size_t done = 0; -+ -+ if (fd < 0) { -+ fprintf(stderr, "%s: Bad fd for map\n", __func__); -+ return -1; -+ } -+ -+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) { -+ if (sm->len[i] == 0) { -+ continue; -+ } -+ -+ size_t len = sm->len[i]; -+ hwaddr gpa = sm->c_offset[i]; -+ -+ while (len && !res) { -+ MemoryRegionSection mrs = memory_region_find(get_system_memory(), -+ gpa, len); -+ size_t mrs_size = (size_t)int128_get64(mrs.size); -+ -+ if (!mrs_size) { -+ fprintf(stderr, -+ "%s: No guest region found for 0x%" HWADDR_PRIx "\n", -+ __func__, gpa); -+ res = -EFAULT; -+ break; -+ } -+ -+ trace_vhost_user_fs_slave_io_loop(mrs.mr->name, -+ (uint64_t)mrs.offset_within_region, -+ memory_region_is_ram(mrs.mr), -+ memory_region_is_romd(mrs.mr), -+ (size_t)mrs_size); -+ -+ void *hostptr = qemu_map_ram_ptr(mrs.mr->ram_block, -+ mrs.offset_within_region); -+ ssize_t transferred; -+ if (sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) { -+ /* Read from file into RAM */ -+ if (mrs.mr->readonly) { -+ res = -EFAULT; -+ break; -+ } -+ transferred = pread(fd, hostptr, mrs_size, sm->fd_offset[i]); -+ } else { -+ /* Write into file from RAM */ -+ assert((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W)); -+ transferred = pwrite(fd, hostptr, mrs_size, sm->fd_offset[i]); -+ } -+ trace_vhost_user_fs_slave_io_loop_res(transferred); -+ if (transferred < 0) { -+ res = -errno; -+ break; -+ } -+ if (!transferred) { -+ /* EOF */ -+ break; -+ } -+ -+ done += transferred; -+ len -= transferred; -+ } -+ } -+ close(fd); -+ -+ trace_vhost_user_fs_slave_io_exit(res, done); -+ /* -+ * TODO! We should be returning 'done' if possible but our error handling -+ * doesn't know about that yet. -+ */ -+ return res; -+} -+ - static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) - { - VHostUserFS *fs = VHOST_USER_FS(vdev); -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 757dee0d1e..b4ef0102ad 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_FS_MAP = 6, - VHOST_USER_SLAVE_FS_UNMAP = 7, - VHOST_USER_SLAVE_FS_SYNC = 8, -+ VHOST_USER_SLAVE_FS_IO = 9, - VHOST_USER_SLAVE_MAX - } VhostUserSlaveRequest; - -@@ -1487,6 +1488,9 @@ static void slave_read(void *opaque) - case VHOST_USER_SLAVE_FS_SYNC: - ret = vhost_user_fs_slave_sync(dev, &payload.fs); - break; -+ case VHOST_USER_SLAVE_FS_IO: -+ ret = vhost_user_fs_slave_io(dev, &payload.fs, fd[0]); -+ break; - #endif - default: - error_report("Received unexpected msg type: %d.", hdr.request); -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 69cc6340ed..0750687463 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -68,5 +68,6 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - int fd); - int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); - int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); -+int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd); - - #endif /* _QEMU_VHOST_USER_FS_H */ -diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h -index c63a590069..4b6e681a3e 100644 ---- a/subprojects/libvhost-user/libvhost-user.h -+++ b/subprojects/libvhost-user/libvhost-user.h -@@ -122,6 +122,7 @@ typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_FS_MAP = 6, - VHOST_USER_SLAVE_FS_UNMAP = 7, - VHOST_USER_SLAVE_FS_SYNC = 8, -+ VHOST_USER_SLAVE_FS_IO = 9, - VHOST_USER_SLAVE_MAX - } VhostUserSlaveRequest; - --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch deleted file mode 100644 index 40b429488e..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1f6a9f8567bdf2be00d217abac33a71248541a4a Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 20 May 2019 13:26:51 +0100 -Subject: [PATCH 20/29] DAX/unmap virtiofsd: Route unmappable reads - -When a read with unmappable buffers is found, map it to a slave -read command. - -Signed-off-by: Dr. David Alan Gilbert ---- - tools/virtiofsd/fuse_virtio.c | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 31f17ab043..1f4c7fff35 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -397,6 +397,35 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - in_sg_left -= ret; - len -= ret; - } while (in_sg_left); -+ -+ if (bad_in_num) { -+ while (len && bad_in_num) { -+ VhostUserFSSlaveMsg msg = { 0 }; -+ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_R; -+ msg.fd_offset[0] = buf->buf[0].pos; -+ msg.c_offset[0] = (uint64_t)(uintptr_t)in_sg_ptr[0].iov_base; -+ msg.len[0] = in_sg_ptr[0].iov_len; -+ if (len < msg.len[0]) { -+ msg.len[0] = len; -+ } -+ bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd " -+ "c_offset=%p req_res=%d\n", -+ __func__, len, bad_in_num, buf->buf[0].pos, -+ in_sg_ptr[0].iov_base, req_res); -+ if (req_res) { -+ len -= msg.len[0]; -+ buf->buf[0].pos += msg.len[0]; -+ in_sg_ptr++; -+ bad_in_num--; -+ } else { -+ ret = EIO; -+ free(in_sg_cpy); -+ goto err; -+ } -+ } -+ } - free(in_sg_cpy); - - /* Need to fix out->len on EOF */ --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch deleted file mode 100644 index b88bcb4a60..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch +++ /dev/null @@ -1,350 +0,0 @@ -From 2a64df420827ff0b127a30f2ac877a7b1ded925b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 20 May 2019 18:08:41 +0100 -Subject: [PATCH 22/29] DAX: vhost-user: Rework slave return values - -All the current slave handlers on the qemu side generate an 'int' -return value that's squashed down to a bool (!!ret) and stuffed into -a uint64_t (field of a union) to be returned. - -Move the uint64_t type back up through the individual handlers so -that we can mkae one actually return a full uint64_t. - -Note that the definition in the interop spec says most of these -cases are defined as returning 0 on success and non-0 for failure, -so it's OK to change from a bool to another non-0. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/virtio/vhost-backend.c | 4 +-- - hw/virtio/vhost-user-fs.c | 42 ++++++++++++++++--------------- - hw/virtio/vhost-user.c | 32 ++++++++++++----------- - include/hw/virtio/vhost-backend.h | 2 +- - include/hw/virtio/vhost-user-fs.h | 13 ++++++---- - 5 files changed, 50 insertions(+), 43 deletions(-) - -diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c -index 222bbcc62d..e81083ddda 100644 ---- a/hw/virtio/vhost-backend.c -+++ b/hw/virtio/vhost-backend.c -@@ -401,7 +401,7 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, - return -ENODEV; - } - --int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, -+uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, - struct vhost_iotlb_msg *imsg) - { - int ret = 0; -@@ -424,5 +424,5 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, - break; - } - -- return ret; -+ return !!ret; - } -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index 82a32492a7..c02dcaeca7 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -35,19 +35,19 @@ - #define DAX_WINDOW_PROT PROT_NONE - #endif - --int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -- int fd) -+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd) - { - VHostUserFS *fs = VHOST_USER_FS(dev->vdev); - if (!fs) { - /* Shouldn't happen - but seen on error path */ - fprintf(stderr, "%s: Bad fs ptr\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - size_t cache_size = fs->conf.cache_size; - if (!cache_size) { - fprintf(stderr, "%s: map when DAX cache not present\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - void *cache_host = memory_region_get_ram_ptr(&fs->cache); - -@@ -56,7 +56,7 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - - if (fd < 0) { - fprintf(stderr, "%s: Bad fd for map\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - - for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { -@@ -78,11 +78,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0), - MAP_SHARED | MAP_FIXED, - fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) { -+ res = -errno; - fprintf(stderr, "%s: map failed err %d [%d] %" - PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, - errno, i, sm->c_offset[i], sm->len[i], - sm->fd_offset[i]); -- res = -1; - break; - } - } -@@ -91,10 +91,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - /* Something went wrong, unmap them all */ - vhost_user_fs_slave_unmap(dev, sm); - } -- return res; -+ return (uint64_t)res; - } - --int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) -+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, -+ VhostUserFSSlaveMsg *sm) - { - VHostUserFS *fs = VHOST_USER_FS(dev->vdev); - if (!fs) { -@@ -114,7 +115,7 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - } - - fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - void *cache_host = memory_region_get_ram_ptr(&fs->cache); - -@@ -148,26 +149,27 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - if (ptr != (cache_host + sm->c_offset[i])) { -+ res = -errno; - fprintf(stderr, "%s: mmap failed (%s) [%d] %" - PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n", - __func__, - strerror(errno), - i, sm->c_offset[i], sm->len[i], - sm->fd_offset[i], ptr); -- res = -1; - } - } - -- return res; -+ return (uint64_t)res; - } - --int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) -+uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev, -+ VhostUserFSSlaveMsg *sm) - { - VHostUserFS *fs = VHOST_USER_FS(dev->vdev); - size_t cache_size = fs->conf.cache_size; - if (!cache_size) { - fprintf(stderr, "%s: sync when DAX cache not present\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - void *cache_host = memory_region_get_ram_ptr(&fs->cache); - -@@ -191,26 +193,26 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) - - if (msync(cache_host + sm->c_offset[i], sm->len[i], - MS_SYNC /* ?? */)) { -+ res = -errno; - fprintf(stderr, "%s: msync failed (%s) [%d] %" - PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, - strerror(errno), - i, sm->c_offset[i], sm->len[i], - sm->fd_offset[i]); -- res = -1; - } - } - -- return res; -+ return (uint64_t)res; - } - --int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -- int fd) -+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd) - { - VHostUserFS *fs = VHOST_USER_FS(dev->vdev); - if (!fs) { - /* Shouldn't happen - but seen it in error paths */ - fprintf(stderr, "%s: Bad fs ptr\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - - unsigned int i; -@@ -219,7 +221,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - - if (fd < 0) { - fprintf(stderr, "%s: Bad fd for map\n", __func__); -- return -1; -+ return (uint64_t)-1; - } - - for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) { -@@ -285,7 +287,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - * TODO! We should be returning 'done' if possible but our error handling - * doesn't know about that yet. - */ -- return res; -+ return (uint64_t)res; - } - - static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index b4ef0102ad..d95dbc39e3 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1325,24 +1325,25 @@ static int vhost_user_reset_device(struct vhost_dev *dev) - return 0; - } - --static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) -+static uint64_t vhost_user_slave_handle_config_change(struct vhost_dev *dev) - { - int ret = -1; - - if (!dev->config_ops) { -- return -1; -+ return true; - } - - if (dev->config_ops->vhost_dev_config_notifier) { - ret = dev->config_ops->vhost_dev_config_notifier(dev); - } - -- return ret; -+ return !!ret; - } - --static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, -- VhostUserVringArea *area, -- int fd) -+static uint64_t vhost_user_slave_handle_vring_host_notifier( -+ struct vhost_dev *dev, -+ VhostUserVringArea *area, -+ int fd) - { - int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; - size_t page_size = qemu_real_host_page_size; -@@ -1356,7 +1357,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, - if (!virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || - vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { -- return -1; -+ return true; - } - - n = &user->notifier[queue_idx]; -@@ -1369,18 +1370,18 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, - } - - if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { -- return 0; -+ return false; - } - - /* Sanity check. */ - if (area->size != page_size) { -- return -1; -+ return true; - } - - addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, area->offset); - if (addr == MAP_FAILED) { -- return -1; -+ return true; - } - - name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", -@@ -1391,13 +1392,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, - - if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { - munmap(addr, page_size); -- return -1; -+ return true; - } - - n->addr = addr; - n->set = true; - -- return 0; -+ return false; - } - - static void slave_read(void *opaque) -@@ -1406,7 +1407,8 @@ static void slave_read(void *opaque) - struct vhost_user *u = dev->opaque; - VhostUserHeader hdr = { 0, }; - VhostUserPayload payload = { 0, }; -- int size, ret = 0; -+ int size; -+ uint64_t ret = 0; - struct iovec iov; - struct msghdr msgh; - int fd[VHOST_USER_SLAVE_MAX_FDS]; -@@ -1494,7 +1496,7 @@ static void slave_read(void *opaque) - #endif - default: - error_report("Received unexpected msg type: %d.", hdr.request); -- ret = -EINVAL; -+ ret = (uint64_t)-EINVAL; - } - - /* Close the remaining file descriptors. */ -@@ -1515,7 +1517,7 @@ static void slave_read(void *opaque) - hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; - hdr.flags |= VHOST_USER_REPLY_MASK; - -- payload.u64 = !!ret; -+ payload.u64 = ret; - hdr.size = sizeof(payload.u64); - - iovec[0].iov_base = &hdr; -diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h -index 8a6f8e2a7a..64ac6b6444 100644 ---- a/include/hw/virtio/vhost-backend.h -+++ b/include/hw/virtio/vhost-backend.h -@@ -186,7 +186,7 @@ int vhost_backend_update_device_iotlb(struct vhost_dev *dev, - int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, - uint64_t iova, uint64_t len); - --int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, -+uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, - struct vhost_iotlb_msg *imsg); - - int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 0750687463..845cdb0177 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -64,10 +64,13 @@ struct VHostUserFS { - }; - - /* Callbacks from the vhost-user code for slave commands */ --int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -- int fd); --int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); --int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); --int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd); -+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, -+ int fd); -+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, -+ VhostUserFSSlaveMsg *sm); -+uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev, -+ VhostUserFSSlaveMsg *sm); -+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, -+ VhostUserFSSlaveMsg *sm, int fd); - - #endif /* _QEMU_VHOST_USER_FS_H */ --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch deleted file mode 100644 index d8f94aeb03..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 5e0e90706b03fa71072b6b17779e0a66cb14aa64 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 21 May 2019 15:10:05 +0100 -Subject: [PATCH 24/29] DAX: virtiofsd: Rework fs-cache-request error path - -Rework error values all the way back to the guest for IO requests. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/virtio/vhost-user-fs.c | 9 +++-- - subprojects/libvhost-user/libvhost-user.c | 18 ++++++---- - subprojects/libvhost-user/libvhost-user.h | 6 ++-- - tools/virtiofsd/fuse_lowlevel.h | 11 ++++--- - tools/virtiofsd/fuse_virtio.c | 40 +++++++++++------------ - 5 files changed, 45 insertions(+), 39 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index c02dcaeca7..b43725824f 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -283,11 +283,10 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, - close(fd); - - trace_vhost_user_fs_slave_io_exit(res, done); -- /* -- * TODO! We should be returning 'done' if possible but our error handling -- * doesn't know about that yet. -- */ -- return (uint64_t)res; -+ if (res < 0) { -+ return (uint64_t)res; -+ } -+ return (uint64_t)done; - } - - static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) -diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c -index a1cbb626d2..4cf4aef63d 100644 ---- a/subprojects/libvhost-user/libvhost-user.c -+++ b/subprojects/libvhost-user/libvhost-user.c -@@ -2919,8 +2919,8 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, - vu_queue_inflight_post_put(dev, vq, elem->index); - } - --bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -- VhostUserFSSlaveMsg *fsm) -+int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -+ VhostUserFSSlaveMsg *fsm) - { - int fd_num = 0; - bool res; -@@ -2939,18 +2939,24 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, - vmsg.fd_num = fd_num; - - if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { -- return false; -+ return -EINVAL; - } - - pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { - pthread_mutex_unlock(&dev->slave_mutex); -- return false; -+ return -EIO; - } - - /* Also unlocks the slave_mutex */ - res = vu_process_message_reply(dev, &vmsg, &payload); -- res = res && (payload == 0); -- return res; -+ if (!res) { -+ return -EIO; -+ } -+ /* -+ * Payload is delivered as uint64_t but is actually signed for -+ * errors. -+ */ -+ return (int64_t)payload; - } - -diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h -index 4b6e681a3e..ee75d4931f 100644 ---- a/subprojects/libvhost-user/libvhost-user.h -+++ b/subprojects/libvhost-user/libvhost-user.h -@@ -723,9 +723,9 @@ bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, - * @fd: an fd (only required for map, else must be -1) - * @fsm: The body of the message - * -- * Returns: true if the reply was 0 -+ * Returns: 0 or above for success, negative errno on error - */ --bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -- VhostUserFSSlaveMsg *fsm); -+int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, -+ VhostUserFSSlaveMsg *fsm); - - #endif /* LIBVHOST_USER_H */ -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index e543f64177..a36a893871 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1998,7 +1998,7 @@ int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); - * @param fd The fd to map - * @return Zero on success - */ --int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); -+int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); - - /** - * For use with virtio-fs; request unmapping of part of the cache -@@ -2007,7 +2007,7 @@ int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); - * @param msg A set of unmapping requests - * @return Zero on success - */ --int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); -+int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); - - /** - * For use with virtio-fs; request synchronisation of part of the cache -@@ -2017,7 +2017,7 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); - * @param msg A set of syncing requests - * @return Zero on success - */ --int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); -+int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); - - /** - * For use with virtio-fs; request IO directly to memory -@@ -2025,9 +2025,10 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); - * @param se The current session - * @param msg A set of IO requests - * @param fd The fd to map -- * @return Zero on success -+ * @return Length on success, negative errno on error - */ --int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd); -+int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, -+ int fd); - - /** - * For use with virtio-fs; wrapper for fuse_virtio_io for writes -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 416d285844..9577eaa68d 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -408,13 +408,13 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - if (len < msg.len[0]) { - msg.len[0] = len; - } -- bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd); -+ int64_t req_res = fuse_virtio_io(se, &msg, buf->buf[0].fd); - fuse_log(FUSE_LOG_DEBUG, - "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd " -- "c_offset=%p req_res=%d\n", -+ "c_offset=%p req_res=%ld\n", - __func__, len, bad_in_num, buf->buf[0].pos, - in_sg_ptr[0].iov_base, req_res); -- if (req_res) { -+ if (req_res > 0) { - len -= msg.len[0]; - buf->buf[0].pos += msg.len[0]; - in_sg_ptr++; -@@ -422,7 +422,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - } else if (req_res == 0) { - break; - } else { -- ret = EIO; -+ ret = req_res; - free(in_sg_cpy); - goto err; - } -@@ -1155,40 +1155,41 @@ void virtio_session_close(struct fuse_session *se) - se->virtio_dev = NULL; - } - --int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) -+int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) - { - if (!req->se->virtio_dev) { - return -ENODEV; - } -- return !vu_fs_cache_request(&req->se->virtio_dev->dev, -- VHOST_USER_SLAVE_FS_MAP, fd, msg); -+ return vu_fs_cache_request(&req->se->virtio_dev->dev, -+ VHOST_USER_SLAVE_FS_MAP, fd, msg); - } - --int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) -+int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) - { - if (!se->virtio_dev) { - return -ENODEV; - } -- return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, -- -1, msg); -+ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, -+ -1, msg); - } - --int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) -+int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) - { - if (!req->se->virtio_dev) { - return -ENODEV; - } -- return !vu_fs_cache_request(&req->se->virtio_dev->dev, -- VHOST_USER_SLAVE_FS_SYNC, -1, msg); -+ return vu_fs_cache_request(&req->se->virtio_dev->dev, -+ VHOST_USER_SLAVE_FS_SYNC, -1, msg); - } - --int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd) -+int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, -+ int fd) - { - if (!se->virtio_dev) { - return -ENODEV; - } -- return !vu_fs_cache_request(&se->virtio_dev->dev, -- VHOST_USER_SLAVE_FS_IO, fd, msg); -+ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_IO, fd, -+ msg); - } - - /* -@@ -1214,8 +1215,7 @@ ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, - msg.len[0] = len; - msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W; - -- bool result = !fuse_virtio_io(req->se, &msg, dst->fd); -- /* TODO: Rework the result path to actually get length/error */ -- fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result); -- return result ? len : -EIO; -+ int64_t result = fuse_virtio_io(req->se, &msg, dst->fd); -+ fuse_log(FUSE_LOG_DEBUG, "%s: result=%ld\n", __func__, result); -+ return result; - } --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch deleted file mode 100644 index db0596b2d6..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0946e9a802943443333eb7e8c6a0989f37c236a5 Mon Sep 17 00:00:00 2001 -From: Peng Tao -Date: Mon, 3 Jun 2019 10:47:19 +0800 -Subject: [PATCH 25/29] DAX: virtiofsd: make FUSE_REMOVEMAPPING support - multiple entries - -The fuse wire protocol is changed so that we can unmap multiple -mappings in a single call. - -Signed-off-by: Peng Tao -fix by: Catherine Ho ---- - tools/virtiofsd/fuse_lowlevel.c | 5 +++-- - tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++-------- - 2 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index a2480d4aa1..99ba000c2e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1920,12 +1920,13 @@ static void do_removemapping(fuse_req_t req, fuse_ino_t nodeid, - struct fuse_removemapping_one *one; - - arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -- if (!arg) { -+ if (!arg || arg->count <= 0) { -+ fuse_log(FUSE_LOG_ERR, "do_removemapping: invalid arg %p\n", arg); - fuse_reply_err(req, EINVAL); - return; - } - -- one = fuse_mbuf_iter_advance(iter, sizeof(*one)); -+ one = fuse_mbuf_iter_advance(iter, arg->count * sizeof(*one)); - if (!one) { - fuse_log( - FUSE_LOG_ERR, -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ab33fabcda..3af55ffb8a 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2965,14 +2965,24 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se, - VhostUserFSSlaveMsg msg = { 0 }; - int ret = 0; - -- msg.len[0] = argp->len; -- msg.c_offset[0] = argp->moffset; -- if (fuse_virtio_unmap(se, &msg)) { -- fprintf(stderr, -- "%s: unmap over virtio failed " -- "(offset=0x%lx, len=0x%lx)\n", -- __func__, argp->moffset, argp->len); -- ret = EINVAL; -+ for (int i = 0; num > 0; i++, argp++) { -+ msg.len[i] = argp->len; -+ msg.c_offset[i] = argp->moffset; -+ -+ if (--num == 0 || i == VHOST_USER_FS_SLAVE_ENTRIES - 1) { -+ if (fuse_virtio_unmap(se, &msg)) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: unmap over virtio failed " -+ "(offset=0x%lx, len=0x%lx)\n", -+ __func__, argp->moffset, argp->len); -+ ret = EINVAL; -+ break; -+ } -+ if (num > 0) { -+ i = 0; -+ memset(&msg, 0, sizeof(msg)); -+ } -+ } - } - - fuse_reply_err(req, ret); --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch deleted file mode 100644 index 9505b723b5..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch +++ /dev/null @@ -1,776 +0,0 @@ -From a0cbb60bb58ffaf2ae771c7822f0cb25762076fa Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Wed, 20 Nov 2019 14:27:19 +0000 -Subject: [PATCH 27/29] virtiofsd: add initial support for shared versions - -Not backward compatible with previous kernels, so please only use with -kernel that has version table support (this will need to be cleaned up). - -No READDIRPLUS support in the kernel for versioned entries, so disable for -now. - -Attribute timeout is set to "infinity", so changes to underlying filesystem -won't be visible. This also needs to be fixed, but is best for testing the -versioning since the shared version is the only thing that will force -refreshing metadata and dcache lookups. - -No caching metadata modifications yet. - -Start "ireg" daemon before starting any fuse servers. - -Signed-off-by: Miklos Szeredi -Fix by: -Signed-off-by: Liu Bo -Only send entryver_out when shared is enabled by: -With help message update from: -Signed-off-by: Xiao Yang - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi ---- - include/standard-headers/linux/fuse.h | 5 + - tools/virtiofsd/fuse_lowlevel.c | 36 ++- - tools/virtiofsd/fuse_lowlevel.h | 9 +- - tools/virtiofsd/helper.c | 4 + - tools/virtiofsd/ireg.h | 33 +++ - tools/virtiofsd/passthrough_ll.c | 321 +++++++++++++++++++++++++- - 6 files changed, 387 insertions(+), 21 deletions(-) - create mode 100644 tools/virtiofsd/ireg.h - -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -index 82c0a38b59..fbced7caef 100644 ---- a/include/standard-headers/linux/fuse.h -+++ b/include/standard-headers/linux/fuse.h -@@ -510,6 +510,11 @@ struct fuse_entry_out { - struct fuse_attr attr; - }; - -+struct fuse_entryver_out { -+ uint64_t version_index; -+ int64_t initial_version; -+}; -+ - struct fuse_forget_in { - uint64_t nlookup; - }; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index d6256f571b..47231378db 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -389,28 +389,46 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) - } - } - --int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e, -+ bool shared) - { -- struct fuse_entry_out arg; -- size_t size = sizeof(arg); -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_entryver_out)]; -+ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; -+ struct fuse_entryver_out *ever = -+ (struct fuse_entryver_out *)(buf + sizeof(struct fuse_entry_out)); -+ size_t size = sizeof(buf); - -- memset(&arg, 0, sizeof(arg)); -- fill_entry(&arg, e); -- return send_reply_ok(req, &arg, size); -+ if ((req->se->conn.proto_minor >= 9) && !shared) { -+ size -= sizeof(struct fuse_entryver_out); -+ } -+ -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ ever->initial_version = e->initial_version; -+ ever->version_index = e->version_offset; -+ return send_reply_ok(req, buf, size); - } - - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *f) -+ const struct fuse_file_info *f, bool shared) - { -- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out) + -+ sizeof(struct fuse_entryver_out)]; - size_t entrysize = sizeof(struct fuse_entry_out); - struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; - struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); -+ struct fuse_entryver_out *ever = -+ (struct fuse_entryver_out *)(buf + entrysize + -+ sizeof(struct fuse_open_out)); - - memset(buf, 0, sizeof(buf)); - fill_entry(earg, e); - fill_open(oarg, f); -- return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); -+ ever->initial_version = e->initial_version; -+ ever->version_index = e->version_offset; -+ return send_reply_ok(req, buf, -+ entrysize + sizeof(struct fuse_open_out) + -+ (shared ? sizeof(struct fuse_entryver_out) : 0)); - } - - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index a36a893871..5f60e3fd2c 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -26,6 +26,7 @@ - #include "fuse_common.h" - #include "standard-headers/linux/fuse.h" - -+#include - #include - #include - #include -@@ -104,6 +105,9 @@ struct fuse_entry_param { - * Flags for fuse_attr.flags that do not fit into attr. - */ - uint32_t attr_flags; -+ -+ uint64_t version_offset; -+ int64_t initial_version; - }; - - /** -@@ -1294,7 +1298,8 @@ void fuse_reply_none(fuse_req_t req); - * @param e the entry parameters - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e, -+ bool shared); - - /** - * Reply with a directory entry and open parameters -@@ -1314,7 +1319,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *fi); -+ const struct fuse_file_info *fi, bool shared); - - /** - * Reply with attributes -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 28243b51b2..29331ec2fc 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -174,6 +174,10 @@ void fuse_cmdline_help(void) - " default: no_xattr\n" - " -o modcaps=CAPLIST Modify the list of capabilities\n" - " e.g. -o modcaps=+sys_admin:-chown\n" -+ " -o shared|no_shared enable/disable shared cache\n" -+ " default: no_shared\n" -+ " please start 'ireg' daemon before " -+ " using shared cache\n" - " --rlimit-nofile= set maximum number of file descriptors\n" - " (0 leaves rlimit unchanged)\n" - " default: min(1000000, fs.file-max - 16384)\n" -diff --git a/tools/virtiofsd/ireg.h b/tools/virtiofsd/ireg.h -new file mode 100644 -index 0000000000..91c0f386d7 ---- /dev/null -+++ b/tools/virtiofsd/ireg.h -@@ -0,0 +1,33 @@ -+#define VERSION_TABLE_MAGIC 0x7265566465726853 -+ -+enum ireg_op { -+ IREG_GET, -+ IREG_PUT, -+}; -+ -+struct ireg_msg { -+ enum ireg_op op; -+ uint64_t handle; -+ union { -+ struct { -+ uint64_t ino; -+ uint64_t dev; -+ } get; -+ struct { -+ uint64_t refid; -+ } put; -+ }; -+}; -+ -+enum srv_op { -+ SRV_VERSION, -+}; -+ -+struct srv_msg { -+ enum srv_op op; -+ uint64_t handle; -+ struct { -+ uint64_t refid; -+ uint64_t offset; -+ } version; -+}; -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 3af55ffb8a..52a52b2dd7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -44,16 +44,21 @@ - #include - #include - #include -+#include - #include -+#include - #include - #include - #include -+#include - #include -+#include - #include - #include - #include - - #include "qemu/cutils.h" -+#include "ireg.h" - #include "passthrough_helpers.h" - #include "passthrough_seccomp.h" - -@@ -110,6 +115,8 @@ struct lo_inode { - */ - uint64_t nlookup; - -+ uint64_t version_offset; -+ uint64_t ireg_refid; - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -@@ -152,12 +159,16 @@ struct lo_data { - char *modcaps; - double timeout; - int cache; -+ int shared; - int timeout_set; - int readdirplus_set; - int readdirplus_clear; - int allow_direct_io; - int announce_submounts; - bool use_statx; -+ int ireg_sock; -+ int64_t *version_table; -+ uint64_t version_table_size; - struct lo_inode root; - GHashTable *inodes; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ -@@ -193,6 +204,8 @@ static const struct fuse_opt lo_opts[] = { - { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, - { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ { "shared", offsetof(struct lo_data, shared), 1 }, -+ { "no_shared", offsetof(struct lo_data, shared), 0 }, - { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, - { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, - { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, -@@ -204,6 +217,7 @@ static bool use_syslog = false; - static int current_log_level; - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - uint64_t n); -+static void put_shared(struct lo_data *lo, struct lo_inode *inode); - - static struct { - pthread_mutex_t mutex; -@@ -512,6 +526,7 @@ static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) - - if (g_atomic_int_dec_and_test(&inode->refcount)) { - close(inode->fd); -+ put_shared(lo, inode); - free(inode); - } - } -@@ -587,8 +602,9 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - } - } - -+ /* TODO: shared version support for readdirplus */ - if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || -- lo->readdirplus_clear) { -+ lo->readdirplus_clear || lo->shared) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; - } -@@ -600,6 +616,29 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - } - } - -+static int64_t *version_ptr(struct lo_data *lo, struct lo_inode *inode) -+{ -+ return lo->version_table + inode->version_offset; -+} -+ -+static int64_t get_version(struct lo_data *lo, struct lo_inode *inode) -+{ -+ if (!inode->version_offset) { -+ return 0; -+ } -+ -+ return __atomic_load_8(version_ptr(lo, inode), __ATOMIC_SEQ_CST); -+} -+ -+static void update_version(struct lo_data *lo, struct lo_inode *inode) -+{ -+ if (!inode->version_offset) { -+ return; -+ } -+ -+ __atomic_add_fetch(version_ptr(lo, inode), 1, __ATOMIC_SEQ_CST); -+} -+ - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -@@ -731,6 +770,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - goto out_err; - } - } -+ update_version(lo, inode); - lo_inode_put(lo, &inode); - - return lo_getattr(req, ino, fi); -@@ -763,6 +803,74 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, - return p; - } - -+struct msgreply { -+ struct lo_inode *inode; -+ sem_t ready; -+}; -+ -+static void get_shared(struct lo_data *lo, struct lo_inode *inode) -+{ -+ int res; -+ struct msgreply rep = { -+ .inode = inode, -+ }; -+ struct ireg_msg msg = { -+ .op = IREG_GET, -+ .handle = (uintptr_t) &rep, -+ .get = { -+ .ino = inode->key.ino, -+ .dev = inode->key.dev, -+ }, -+ }; -+ -+ if (lo->ireg_sock == -1) { -+ inode->version_offset = 0; -+ return; -+ } -+ -+ sem_init(&rep.ready, 0, 0); -+ -+ res = write(lo->ireg_sock, &msg, sizeof(msg)); -+ if (res != sizeof(msg)) { -+ if (res == -1) { -+ fuse_log(FUSE_LOG_WARNING, -+ "write(lo->ireg_sock, {IREG_GET, ...}): %m\n"); -+ } else { -+ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res); -+ } -+ return; -+ } -+ -+ while (sem_wait(&rep.ready)) { -+ ; -+ } -+ sem_destroy(&rep.ready); -+} -+ -+static void put_shared(struct lo_data *lo, struct lo_inode *inode) -+{ -+ int res; -+ struct ireg_msg msg = { -+ .op = IREG_PUT, -+ .put.refid = inode->ireg_refid, -+ }; -+ -+ if (lo->ireg_sock == -1) { -+ return; -+ } -+ -+ res = write(lo->ireg_sock, &msg, sizeof(msg)); -+ if (res != sizeof(msg)) { -+ if (res == -1) { -+ fuse_log(FUSE_LOG_WARNING, -+ "write(lo->ireg_sock, {IREG_PUT, ...}): %m\n"); -+ } else { -+ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res); -+ } -+ return; -+ } -+} -+ - /* value_destroy_func for posix_locks GHashTable */ - static void posix_locks_value_destroy(gpointer data) - { -@@ -908,16 +1016,30 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - } - pthread_mutex_lock(&lo->mutex); -+ get_shared(lo, inode); - inode->fuse_ino = lo_add_inode_mapping(req, inode); - g_hash_table_insert(lo->inodes, &inode->key, inode); - pthread_mutex_unlock(&lo->mutex); - } -+ -+ e->initial_version = get_version(lo, inode); -+ res = fstatat(inode->fd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ saverr = errno; -+ unref_inode_lolocked(lo, inode, 1); -+ errno = saverr; -+ goto out_err; -+ } -+ - e->ino = inode->fuse_ino; -+ e->version_offset = inode->version_offset; - lo_inode_put(lo, &inode); - lo_inode_put(lo, &dir); - -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -- name, (unsigned long long)e->ino); -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli (version_table[%lli]=%lli)\n", -+ (unsigned long long)parent, name, (unsigned long long)e->ino, -+ (unsigned long long)e->version_offset, -+ (unsigned long long)e->initial_version); - - return 0; - -@@ -952,7 +1074,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - if (err) { - fuse_reply_err(req, err); - } else { -- fuse_reply_entry(req, &e); -+ fuse_reply_entry(req, &e, lo_data(req)->shared); - } - } - -@@ -1056,6 +1178,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -+ update_version(lo, dir); -+ - saverr = lo_do_lookup(req, parent, name, &e); - if (saverr) { - goto out; -@@ -1064,7 +1188,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, - name, (unsigned long long)e.ino); - -- fuse_reply_entry(req, &e); -+ fuse_reply_entry(req, &e, lo->shared); - lo_inode_put(lo, &dir); - return; - -@@ -1134,11 +1258,13 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - inode->nlookup++; - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; -+ update_version(lo, inode); -+ update_version(lo, parent_inode); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, - name, (unsigned long long)e.ino); - -- fuse_reply_entry(req, &e); -+ fuse_reply_entry(req, &e, lo->shared); - lo_inode_put(lo, &parent_inode); - lo_inode_put(lo, &inode); - return; -@@ -1192,8 +1318,21 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - } - - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ struct lo_inode *parent_inode; - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ update_version(lo, inode); -+ -+ parent_inode = lo_inode(req, parent); -+ if (parent_inode) { -+ update_version(lo, parent_inode); -+ lo_inode_put(lo, &parent_inode); -+ } -+ -+ fuse_reply_err(req, 0); -+ } - unref_inode_lolocked(lo, inode, 1); - lo_inode_put(lo, &inode); - } -@@ -1245,8 +1384,18 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ update_version(lo, oldinode); -+ if (newinode) { -+ update_version(lo, newinode); -+ } -+ update_version(lo, parent_inode); -+ update_version(lo, newparent_inode); -+ fuse_reply_err(req, 0); -+ } - -- fuse_reply_err(req, res == -1 ? errno : 0); - out: - unref_inode_lolocked(lo, oldinode, 1); - unref_inode_lolocked(lo, newinode, 1); -@@ -1274,8 +1423,21 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - } - - res = unlinkat(lo_fd(req, parent), name, 0); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ struct lo_inode *parent_inode; - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ update_version(lo, inode); -+ -+ parent_inode = lo_inode(req, parent); -+ if (parent_inode) { -+ update_version(lo, parent_inode); -+ lo_inode_put(lo, &parent_inode); -+ } -+ -+ fuse_reply_err(req, 0); -+ } - unref_inode_lolocked(lo, inode, 1); - lo_inode_put(lo, &inode); - } -@@ -1690,6 +1852,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - if (!err) { - ssize_t fh; - -+ update_version(lo, parent_inode); -+ - pthread_mutex_lock(&lo->mutex); - fh = lo_add_fd_mapping(req, fd); - pthread_mutex_unlock(&lo->mutex); -@@ -1714,7 +1878,7 @@ out: - if (err) { - fuse_reply_err(req, err); - } else { -- fuse_reply_create(req, &e, fi); -+ fuse_reply_create(req, &e, fi, lo->shared); - } - } - -@@ -2041,6 +2205,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - (void)ino; - ssize_t res; - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ struct lo_data *lo = lo_data(req); - bool cap_fsetid_dropped = false; - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -@@ -2067,6 +2232,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - if (res < 0) { - fuse_reply_err(req, -res); - } else { -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (inode) { -+ update_version(lo, inode); -+ lo_inode_put(lo, &inode); -+ } -+ - fuse_reply_write(req, (size_t)res); - } - -@@ -2095,6 +2268,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - off_t length, struct fuse_file_info *fi) - { - int err = EOPNOTSUPP; -+ struct lo_data *lo = lo_data(req); - (void)ino; - - #ifdef CONFIG_FALLOCATE -@@ -2112,6 +2286,16 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - err = posix_fallocate(lo_fi_fd(req, fi), offset, length); - #endif - -+ if (!err) { -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (inode) { -+ update_version(lo, inode); -+ lo_inode_put(lo, &inode); -+ } -+ } -+ - fuse_reply_err(req, err); - } - -@@ -2754,6 +2938,9 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, - - saverr = ret == -1 ? errno : 0; - -+ if (!saverr) { -+ update_version(lo, inode); -+ } - out: - if (fd >= 0) { - close(fd); -@@ -2820,6 +3007,9 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) - - saverr = ret == -1 ? errno : 0; - -+ if (!saverr) { -+ update_version(lo, inode); -+ } - out: - if (fd >= 0) { - close(fd); -@@ -3474,6 +3664,101 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - } - -+static void *ireg_do(void *data) -+{ -+ struct lo_data *lo = data; -+ int res; -+ char buf[100]; -+ struct srv_msg reply; -+ struct msgreply *rep; -+ -+ for (;;) { -+ res = read(lo->ireg_sock, buf, sizeof(buf)); -+ if (res <= 0) { -+ if (res == -1) { -+ fuse_log(FUSE_LOG_WARNING, "read(lo->ireg_sock, ...): %m\n"); -+ } else { -+ fuse_log(FUSE_LOG_WARNING, "disconnected from ireg\n"); -+ } -+ return NULL; -+ } -+ if (res != sizeof(reply)) { -+ fuse_log(FUSE_LOG_WARNING, "bad size message: %i\n", res); -+ continue; -+ } -+ -+ memcpy(&reply, buf, sizeof(reply)); -+ if (reply.op != SRV_VERSION) { -+ fuse_log(FUSE_LOG_WARNING, "bad reply to IREG_GET: %i\n", reply.op); -+ continue; -+ } -+ -+ rep = (struct msgreply *)(uintptr_t)reply.handle; -+ rep->inode->version_offset = reply.version.offset; -+ rep->inode->ireg_refid = reply.version.refid; -+ sem_post(&rep->ready); -+ } -+} -+ -+static void setup_shared_versions(struct lo_data *lo) -+{ -+ int fd, sock, res; -+ const char *version_path = "/dev/shm/fuse_shared_versions"; -+ struct stat stat; -+ struct sockaddr_un name = { .sun_family = AF_UNIX }; -+ const char *socket_name = "/tmp/ireg.sock"; -+ void *addr; -+ -+ lo->ireg_sock = -1; -+ if (!lo->shared) { -+ return; -+ } -+ -+ sock = socket(AF_UNIX, SOCK_SEQPACKET, 0); -+ if (sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "socket(AF_UNIX, SOCK_SEQPACKET, 0): %m\n"); -+ exit(1); -+ } -+ -+ strncpy(name.sun_path, socket_name, sizeof(name.sun_path) - 1); -+ -+ res = connect(sock, (const struct sockaddr *)&name, -+ sizeof(struct sockaddr_un)); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_WARNING, "connect to ireg: %m\n"); -+ close(sock); -+ lo->ireg_sock = -1; -+ return; -+ } -+ -+ lo->ireg_sock = sock; -+ -+ fd = open(version_path, O_RDWR); -+ if (sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(%s, O_RDWR): %m\n", version_path); -+ exit(1); -+ } -+ -+ res = fstat(fd, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fstat(%i, &stat): %m\n", fd); -+ exit(1); -+ } -+ -+ lo->version_table_size = stat.st_size / sizeof(lo->version_table[0]); -+ -+ addr = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); -+ if (addr == MAP_FAILED) { -+ fuse_log( -+ FUSE_LOG_ERR, -+ "mmap(NULL, %li, PROT_READ | PROT_WRITE, MAP_SHARED, %i, 0): %m\n", -+ stat.st_size, fd); -+ exit(1); -+ } -+ -+ lo->version_table = addr; -+} -+ - static void setup_root(struct lo_data *lo, struct lo_inode *root) - { - int fd, res; -@@ -3688,6 +3973,7 @@ int main(int argc, char *argv[]) - - lo.use_statx = true; - -+ setup_shared_versions(&lo); - se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); - if (se == NULL) { - goto err_out1; -@@ -3711,9 +3997,24 @@ int main(int argc, char *argv[]) - setup_sandbox(&lo, se, opts.syslog); - - setup_root(&lo, &lo.root); -+ -+ if (lo.ireg_sock != -1) { -+ pthread_t ireg_thread; -+ -+ ret = pthread_create(&ireg_thread, NULL, ireg_do, &lo); -+ if (ret) { -+ fuse_log(FUSE_LOG_WARNING, "pthread_create: %s\n", strerror(ret)); -+ ret = 1; -+ goto err_out4; -+ } -+ -+ get_shared(&lo, &lo.root); -+ } -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - -+err_out4: - fuse_session_unmount(se); - cleanup_capng(); - err_out3: --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch deleted file mode 100644 index c4af4e2149..0000000000 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 119990ab3a30564c7e44f4e39344be48fc998f26 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 27 Jul 2018 10:36:41 +0100 -Subject: [PATCH 28/29] virtio-fs: Allow mapping of meta data version table - -The 'meta data version table' is a block of shared memory mapped between -multiple QEMUs and fuse daemons, so that they can be informed -of metadata updates. It's typically a shmfs file, and -it's specified as : - - -device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=1G,versiontable=/dev/shm/mdvt1 - -It gets mapped into the PCI bar after the data cache; it's read only. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/virtio/vhost-user-fs-pci.c | 16 +++++++++-- - hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++ - include/hw/virtio/vhost-user-fs.h | 4 +++ - include/standard-headers/linux/virtio_fs.h | 1 + - 4 files changed, 51 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c -index 19aaa8d722..aad0128fa5 100644 ---- a/hw/virtio/vhost-user-fs-pci.c -+++ b/hw/virtio/vhost-user-fs-pci.c -@@ -42,6 +42,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vdev); - uint64_t cachesize; -+ uint64_t totalsize; - - if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - /* Also reserve config change and hiprio queue vectors */ -@@ -51,18 +52,29 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); - cachesize = dev->vdev.conf.cache_size; - -+ /* PCIe bar needs to be a power of 2 */ -+ totalsize = pow2ceil(cachesize + dev->vdev.mdvt_size); -+ - /* - * The bar starts with the data/DAX cache -- * Others will be added later. -+ * followed by the metadata cache. - */ - memory_region_init(&dev->cachebar, OBJECT(vpci_dev), -- "vhost-fs-pci-cachebar", cachesize); -+ "vhost-fs-pci-cachebar", totalsize); - if (cachesize) { - memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache); - virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize, - VIRTIO_FS_SHMCAP_ID_CACHE); - } - -+ if (dev->vdev.mdvt_size) { -+ memory_region_add_subregion(&dev->cachebar, cachesize, -+ &dev->vdev.mdvt); -+ virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, -+ cachesize, dev->vdev.mdvt_size, -+ VIRTIO_FS_SHMCAP_ID_VERTAB); -+ } -+ - /* After 'realized' so the memory region exists */ - pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR, - PCI_BASE_ADDRESS_SPACE_MEMORY | -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index b43725824f..fb16db7e0d 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -432,6 +432,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) - unsigned int i; - size_t len; - int ret; -+ int mdvtfd = -1; - - if (!fs->conf.chardev.chr) { - error_setg(errp, "missing chardev"); -@@ -475,6 +476,28 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) - "no smaller than the page size"); - return; - } -+ if (fs->conf.mdvtpath) { -+ struct stat statbuf; -+ -+ mdvtfd = open(fs->conf.mdvtpath, O_RDWR); -+ if (mdvtfd < 0) { -+ error_setg_errno(errp, errno, -+ "Failed to open meta-data version table '%s'", -+ fs->conf.mdvtpath); -+ -+ return; -+ } -+ if (fstat(mdvtfd, &statbuf) == -1) { -+ error_setg_errno(errp, errno, -+ "Failed to stat meta-data version table '%s'", -+ fs->conf.mdvtpath); -+ close(mdvtfd); -+ return; -+ } -+ -+ fs->mdvt_size = statbuf.st_size; -+ } -+ - if (fs->conf.cache_size) { - /* Anonymous, private memory is not counted as overcommit */ - cache_ptr = mmap(NULL, fs->conf.cache_size, DAX_WINDOW_PROT, -@@ -489,6 +512,14 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) - fs->conf.cache_size, cache_ptr); - } - -+ if (mdvtfd) { -+ memory_region_init_ram_from_fd(&fs->mdvt, OBJECT(vdev), -+ "virtio-fs-mdvt", -+ fs->mdvt_size, true, mdvtfd, NULL); -+ /* The version table is read-only by the guest */ -+ memory_region_set_readonly(&fs->mdvt, true); -+ } -+ - if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) { - return; - } -@@ -564,6 +595,7 @@ static Property vuf_properties[] = { - conf.num_request_queues, 1), - DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), - DEFINE_PROP_SIZE("cache-size", VHostUserFS, conf.cache_size, 0), -+ DEFINE_PROP_STRING("versiontable", VHostUserFS, conf.mdvtpath), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 845cdb0177..83015ac0fc 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -47,6 +47,7 @@ typedef struct { - uint16_t num_request_queues; - uint16_t queue_size; - uint64_t cache_size; -+ char *mdvtpath; - } VHostUserFSConf; - - struct VHostUserFS { -@@ -61,6 +62,9 @@ struct VHostUserFS { - - /*< public >*/ - MemoryRegion cache; -+ /* Metadata version table */ -+ size_t mdvt_size; -+ MemoryRegion mdvt; - }; - - /* Callbacks from the vhost-user code for slave commands */ -diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h -index 808aa3a402..a17b5172a8 100644 ---- a/include/standard-headers/linux/virtio_fs.h -+++ b/include/standard-headers/linux/virtio_fs.h -@@ -18,6 +18,7 @@ struct virtio_fs_config { - - /* For the id field in virtio_pci_shm_cap */ - #define VIRTIO_FS_SHMCAP_ID_CACHE 0 -+#define VIRTIO_FS_SHMCAP_ID_VERTAB 1 - - #define VIRTIO_FS_PCI_CACHE_BAR 2 - --- -2.25.1 - diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0001-DAX-vhost-user-Rework-slave-return-values.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0001-DAX-vhost-user-Rework-slave-return-values.patch new file mode 100644 index 0000000000..9219c63a71 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0001-DAX-vhost-user-Rework-slave-return-values.patch @@ -0,0 +1,188 @@ +From 922567a721f34a6e138d2b0e31d98da24211fce9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 18:08:41 +0100 +Subject: [PATCH 01/25] DAX: vhost-user: Rework slave return values + +All the current slave handlers on the qemu side generate an 'int' +return value that's squashed down to a bool (!!ret) and stuffed into +a uint64_t (field of a union) to be returned. + +Move the uint64_t type back up through the individual handlers so +that we can make one actually return a full uint64_t. + +Note that the definition in the interop spec says most of these +cases are defined as returning 0 on success and non-0 for failure, +so it's OK to change from a bool to another non-0. + +Vivek: +This is needed because upcoming patches in series will add new functions +which want to return full error code. Existing functions continue to +return true/false so, it should not lead to change of behavior for +existing users. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Greg Kurz +--- + hw/virtio/vhost-backend.c | 16 ++++++++-------- + hw/virtio/vhost-user.c | 29 +++++++++++++++-------------- + include/hw/virtio/vhost-backend.h | 2 +- + 3 files changed, 24 insertions(+), 23 deletions(-) + +diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c +index 594d770b75..7568eccc7c 100644 +--- a/hw/virtio/vhost-backend.c ++++ b/hw/virtio/vhost-backend.c +@@ -403,31 +403,31 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + return -ENODEV; + } + +-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, +- struct vhost_iotlb_msg *imsg) ++uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, ++ struct vhost_iotlb_msg *imsg) + { +- int ret = 0; ++ uint64_t ret = 0; + + if (unlikely(!dev->vdev)) { + error_report("Unexpected IOTLB message when virtio device is stopped"); +- return -EINVAL; ++ return EINVAL; + } + + switch (imsg->type) { + case VHOST_IOTLB_MISS: +- ret = vhost_device_iotlb_miss(dev, imsg->iova, +- imsg->perm != VHOST_ACCESS_RO); ++ ret = -vhost_device_iotlb_miss(dev, imsg->iova, ++ imsg->perm != VHOST_ACCESS_RO); + break; + case VHOST_IOTLB_ACCESS_FAIL: + /* FIXME: report device iotlb error */ + error_report("Access failure IOTLB message type not supported"); +- ret = -ENOTSUP; ++ ret = ENOTSUP; + break; + case VHOST_IOTLB_UPDATE: + case VHOST_IOTLB_INVALIDATE: + default: + error_report("Unexpected IOTLB message type"); +- ret = -EINVAL; ++ ret = EINVAL; + break; + } + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index aec6cc1990..cdbdb5b469 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1409,24 +1409,25 @@ static int vhost_user_reset_device(struct vhost_dev *dev) + return 0; + } + +-static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) ++static uint64_t vhost_user_slave_handle_config_change(struct vhost_dev *dev) + { +- int ret = -1; ++ uint64_t ret = 1; + + if (!dev->config_ops) { +- return -1; ++ return 1; + } + + if (dev->config_ops->vhost_dev_config_notifier) { +- ret = dev->config_ops->vhost_dev_config_notifier(dev); ++ ret = -dev->config_ops->vhost_dev_config_notifier(dev); + } + + return ret; + } + +-static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +- VhostUserVringArea *area, +- int fd) ++static uint64_t vhost_user_slave_handle_vring_host_notifier( ++ struct vhost_dev *dev, ++ VhostUserVringArea *area, ++ int fd) + { + int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; + size_t page_size = qemu_real_host_page_size; +@@ -1440,7 +1441,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || + vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { +- return -1; ++ return 1; + } + + n = &user->notifier[queue_idx]; +@@ -1458,13 +1459,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + + /* Sanity check. */ + if (area->size != page_size) { +- return -1; ++ return 1; + } + + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, area->offset); + if (addr == MAP_FAILED) { +- return -1; ++ return 1; + } + + name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", +@@ -1475,7 +1476,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + + if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { + munmap(addr, page_size); +- return -1; ++ return 1; + } + + n->addr = addr; +@@ -1502,7 +1503,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + VhostUserPayload payload = { 0, }; + Error *local_err = NULL; + gboolean rc = G_SOURCE_CONTINUE; +- int ret = 0; ++ uint64_t ret = 0; + struct iovec iov; + g_autofree int *fd = NULL; + size_t fdsize = 0; +@@ -1543,7 +1544,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + break; + default: + error_report("Received unexpected msg type: %d.", hdr.request); +- ret = -EINVAL; ++ ret = EINVAL; + } + + /* +@@ -1557,7 +1558,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr.flags |= VHOST_USER_REPLY_MASK; + +- payload.u64 = !!ret; ++ payload.u64 = ret; + hdr.size = sizeof(payload.u64); + + iovec[0].iov_base = &hdr; +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index 8475c5a29d..9ee6e87e7d 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -187,7 +187,7 @@ int vhost_backend_update_device_iotlb(struct vhost_dev *dev, + int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len); + +-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, ++uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); + + int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0002-DAX-libvhost-user-Route-slave-message-payload.patch similarity index 55% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0002-DAX-libvhost-user-Route-slave-message-payload.patch index 59d655e000..570929e182 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0002-DAX-libvhost-user-Route-slave-message-payload.patch @@ -1,19 +1,19 @@ -From 55b6372e1b893e77c6c4d5e87bd1a0765126399c Mon Sep 17 00:00:00 2001 +From abd4f64c4e2a4316be87897360ce2a6f350c00e9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 20 May 2019 20:02:29 +0100 -Subject: [PATCH 23/29] DAX: libvhost-user: Route slave message payload +Subject: [PATCH 02/25] DAX: libvhost-user: Route slave message payload Route the uint64 payload from message replies on the slave back up through vu_process_message_reply and to the callers. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- - subprojects/libvhost-user/libvhost-user.c | 20 ++++++++++++++++---- - tools/virtiofsd/fuse_virtio.c | 2 ++ - 2 files changed, 18 insertions(+), 4 deletions(-) + subprojects/libvhost-user/libvhost-user.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c -index 9b8223b5d5..a1cbb626d2 100644 +index bf09693255..354a34ff15 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -403,9 +403,11 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) @@ -60,38 +60,6 @@ index 9b8223b5d5..a1cbb626d2 100644 } static bool -@@ -2915,6 +2923,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, - VhostUserFSSlaveMsg *fsm) - { - int fd_num = 0; -+ bool res; -+ uint64_t payload = 0; - VhostUserMsg vmsg = { - .request = req, - .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, -@@ -2939,6 +2949,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, - } - - /* Also unlocks the slave_mutex */ -- return vu_process_message_reply(dev, &vmsg); -+ res = vu_process_message_reply(dev, &vmsg, &payload); -+ res = res && (payload == 0); -+ return res; - } - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 1f4c7fff35..416d285844 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -419,6 +419,8 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - buf->buf[0].pos += msg.len[0]; - in_sg_ptr++; - bad_in_num--; -+ } else if (req_res == 0) { -+ break; - } else { - ret = EIO; - free(in_sg_cpy); -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0003-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch similarity index 84% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0003-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch index d3f2a1674b..4057d8e397 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0003-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch @@ -1,7 +1,7 @@ -From d14a6cb000d0a5f9e382e5e5de0021756034d0cb Mon Sep 17 00:00:00 2001 +From 712b35aa6f1c4954ea5b90735a2f0ef65fc105a7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 May 2019 18:04:04 +0100 -Subject: [PATCH 01/29] DAX: libvhost-user: Allow popping a queue element with +Subject: [PATCH 03/25] DAX: libvhost-user: Allow popping a queue element with bad pointers Allow a daemon implemented with libvhost-user to accept an @@ -12,7 +12,9 @@ cases of this. The default behaviour doesn't change. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- + block/export/vhost-user-blk-server.c | 2 +- contrib/vhost-user-blk/vhost-user-blk.c | 3 +- contrib/vhost-user-gpu/vhost-user-gpu.c | 5 ++- contrib/vhost-user-input/main.c | 4 +- @@ -21,8 +23,21 @@ Signed-off-by: Dr. David Alan Gilbert subprojects/libvhost-user/libvhost-user.h | 8 +++- tests/vhost-user-bridge.c | 4 +- tools/virtiofsd/fuse_virtio.c | 3 +- - 8 files changed, 59 insertions(+), 21 deletions(-) + 9 files changed, 60 insertions(+), 22 deletions(-) +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 1862563336..76d9619501 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -300,7 +300,7 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) + while (1) { + VuBlkReq *req; + +- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); ++ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq), NULL, NULL); + if (!req) { + break; + } diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index d14b2896bf..01193552e9 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c @@ -38,10 +53,10 @@ index d14b2896bf..01193552e9 100644 return -1; } diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c -index f445ef28ec..58161a4378 100644 +index 611360e6b4..9feba0660e 100644 --- a/contrib/vhost-user-gpu/vhost-user-gpu.c +++ b/contrib/vhost-user-gpu/vhost-user-gpu.c -@@ -819,7 +819,8 @@ vg_handle_ctrl(VuDev *dev, int qidx) +@@ -863,7 +863,8 @@ vg_handle_ctrl(VuDev *dev, int qidx) return; } @@ -51,7 +66,7 @@ index f445ef28ec..58161a4378 100644 if (!cmd) { break; } -@@ -922,7 +923,7 @@ vg_handle_cursor(VuDev *dev, int qidx) +@@ -972,7 +973,7 @@ vg_handle_cursor(VuDev *dev, int qidx) struct virtio_gpu_update_cursor cursor; for (;;) { @@ -61,10 +76,10 @@ index f445ef28ec..58161a4378 100644 break; } diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c -index c15d18c33f..d5c435605c 100644 +index 081230da54..1d868409d0 100644 --- a/contrib/vhost-user-input/main.c +++ b/contrib/vhost-user-input/main.c -@@ -57,7 +57,7 @@ static void vi_input_send(VuInput *vi, struct virtio_input_event *event) +@@ -58,7 +58,7 @@ static void vi_input_send(VuInput *vi, struct virtio_input_event *event) /* ... then check available space ... */ for (i = 0; i < vi->qindex; i++) { @@ -73,7 +88,7 @@ index c15d18c33f..d5c435605c 100644 if (!elem) { while (--i >= 0) { vu_queue_unpop(dev, vq, vi->queue[i].elem, 0); -@@ -141,7 +141,7 @@ static void vi_handle_sts(VuDev *dev, int qidx) +@@ -145,7 +145,7 @@ static void vi_handle_sts(VuDev *dev, int qidx) g_debug("%s", G_STRFUNC); for (;;) { @@ -96,10 +111,10 @@ index 4f6e3e2a24..7564d6ab2d 100644 g_debug("No more elements pending on vq[%d]@%p", idx, vq); break; diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c -index fab7ca17ee..3b1b5c385f 100644 +index 354a34ff15..cedcc05414 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c -@@ -2461,7 +2461,8 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable) +@@ -2469,7 +2469,8 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable) static bool virtqueue_map_desc(VuDev *dev, @@ -109,7 +124,7 @@ index fab7ca17ee..3b1b5c385f 100644 unsigned int max_num_sg, bool is_write, uint64_t pa, size_t sz) { -@@ -2482,10 +2483,35 @@ virtqueue_map_desc(VuDev *dev, +@@ -2490,10 +2491,35 @@ virtqueue_map_desc(VuDev *dev, return false; } @@ -149,7 +164,7 @@ index fab7ca17ee..3b1b5c385f 100644 } iov[num_sg].iov_len = len; num_sg++; -@@ -2516,7 +2542,8 @@ virtqueue_alloc_element(size_t sz, +@@ -2524,7 +2550,8 @@ virtqueue_alloc_element(size_t sz, } static void * @@ -159,7 +174,7 @@ index fab7ca17ee..3b1b5c385f 100644 { struct vring_desc *desc = vq->vring.desc; uint64_t desc_addr, read_len; -@@ -2560,7 +2587,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) +@@ -2568,7 +2595,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) /* Collect all the descriptors */ do { if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) { @@ -168,7 +183,7 @@ index fab7ca17ee..3b1b5c385f 100644 VIRTQUEUE_MAX_SIZE - out_num, true, le64toh(desc[i].addr), le32toh(desc[i].len))) { -@@ -2571,7 +2598,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) +@@ -2579,7 +2606,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) vu_panic(dev, "Incorrect order for descriptors"); return NULL; } @@ -177,7 +192,7 @@ index fab7ca17ee..3b1b5c385f 100644 VIRTQUEUE_MAX_SIZE, false, le64toh(desc[i].addr), le32toh(desc[i].len))) { -@@ -2661,7 +2688,8 @@ vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx) +@@ -2669,7 +2696,8 @@ vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx) } void * @@ -187,7 +202,7 @@ index fab7ca17ee..3b1b5c385f 100644 { int i; unsigned int head; -@@ -2674,7 +2702,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) +@@ -2682,7 +2710,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) { i = (--vq->resubmit_num); @@ -197,7 +212,7 @@ index fab7ca17ee..3b1b5c385f 100644 if (!vq->resubmit_num) { free(vq->resubmit_list); -@@ -2706,7 +2735,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) +@@ -2714,7 +2743,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) vring_set_avail_event(vq, vq->last_avail_idx); } @@ -207,7 +222,7 @@ index fab7ca17ee..3b1b5c385f 100644 if (!elem) { return NULL; diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h -index 7d47f1364a..f0aca2b216 100644 +index 3d13dfadde..330b61c005 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -589,11 +589,17 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); @@ -252,10 +267,10 @@ index 24815920b2..4f6829e6c3 100644 break; } diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index ddcefee427..bd19358437 100644 +index fc2564a603..3d54f01955 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c -@@ -657,7 +657,8 @@ static void *fv_queue_thread(void *opaque) +@@ -697,7 +697,8 @@ static void *fv_queue_thread(void *opaque) __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); while (1) { @@ -266,5 +281,5 @@ index ddcefee427..bd19358437 100644 break; } -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0004-DAX-subprojects-libvhost-user-Add-virtio-fs-slave-ty.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0004-DAX-subprojects-libvhost-user-Add-virtio-fs-slave-ty.patch new file mode 100644 index 0000000000..5a142217a0 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0004-DAX-subprojects-libvhost-user-Add-virtio-fs-slave-ty.patch @@ -0,0 +1,151 @@ +From e1b00c1e5425e5b13efa772b89fa5b25b6f07709 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Feb 2019 18:39:31 +0000 +Subject: [PATCH 04/25] DAX subprojects/libvhost-user: Add virtio-fs slave + types + +Add virtio-fs definitions to libvhost-user + +Signed-off-by: Dr. David Alan Gilbert +--- + subprojects/libvhost-user/libvhost-user.c | 47 +++++++++++++++++++ + subprojects/libvhost-user/libvhost-user.h | 56 +++++++++++++++++++++++ + 2 files changed, 103 insertions(+) + +diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c +index cedcc05414..e7a6a8ed06 100644 +--- a/subprojects/libvhost-user/libvhost-user.c ++++ b/subprojects/libvhost-user/libvhost-user.c +@@ -2918,3 +2918,50 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, + vu_queue_flush(dev, vq, 1); + vu_queue_inflight_post_put(dev, vq, elem->index); + } ++ ++int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm) ++{ ++ int fd_num = 0; ++ bool res; ++ uint64_t payload = 0; ++ VhostUserMsg vmsg = { ++ .request = req, ++ .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, ++ }; ++ ++ if (fsm->hdr.count > VHOST_USER_FS_SLAVE_MAX_ENTRIES) { ++ return -EINVAL; ++ } ++ ++ vmsg.size = sizeof(VhostUserFSSlaveMsg) + ++ fsm->hdr.count * sizeof(VhostUserFSSlaveMsgEntry); ++ memcpy(&vmsg.payload.fs_max, fsm, vmsg.size); ++ ++ if (fd != -1) { ++ vmsg.fds[fd_num++] = fd; ++ } ++ ++ vmsg.fd_num = fd_num; ++ ++ if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { ++ return -EINVAL; ++ } ++ ++ pthread_mutex_lock(&dev->slave_mutex); ++ if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return -EIO; ++ } ++ ++ /* Also unlocks the slave_mutex */ ++ res = vu_process_message_reply(dev, &vmsg, &payload); ++ if (!res) { ++ return -EIO; ++ } ++ /* ++ * Payload is delivered as uint64_t but is actually signed for ++ * errors. ++ */ ++ return (int64_t)payload; ++} +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index 330b61c005..d7511b80a3 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -122,6 +122,49 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + ++/* Structures carried over the slave channel back to QEMU */ ++#define VHOST_USER_FS_SLAVE_MAX_ENTRIES 32 ++ ++/* For the flags field of VhostUserFSSlaveMsg */ ++#define VHOST_USER_FS_FLAG_MAP_R (1u << 0) ++#define VHOST_USER_FS_FLAG_MAP_W (1u << 1) ++ ++typedef struct { ++ /* Offsets within the file being mapped */ ++ uint64_t fd_offset; ++ /* Offsets within the cache */ ++ uint64_t c_offset; ++ /* Lengths of sections */ ++ uint64_t len; ++ /* Flags, from VHOST_USER_FS_FLAG_* */ ++ uint64_t flags; ++} VhostUserFSSlaveMsgEntry; ++ ++typedef struct { ++ /* Spare */ ++ uint32_t align32; ++ /* Number of entries */ ++ uint16_t count; ++ /* Spare */ ++ uint16_t align16; ++} VhostUserFSSlaveMsgHdr; ++ ++/* ++ * This is really a structure with a variable number of entries, ++ * but we want to avoid a variable length array in the union, ++ * so have one version with the variable length array ++ * for places where we have the partial allocation. ++ */ ++typedef struct { ++ VhostUserFSSlaveMsgHdr hdr; ++ VhostUserFSSlaveMsgEntry entries[]; ++} VhostUserFSSlaveMsg; ++ ++typedef struct { ++ VhostUserFSSlaveMsgHdr hdr; ++ VhostUserFSSlaveMsgEntry entries[VHOST_USER_FS_SLAVE_MAX_ENTRIES]; ++} VhostUserFSSlaveMsgMax; ++ + typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; +@@ -197,6 +240,7 @@ typedef struct VhostUserMsg { + VhostUserConfig config; + VhostUserVringArea area; + VhostUserInflight inflight; ++ VhostUserFSSlaveMsgMax fs_max; + } payload; + + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; +@@ -693,4 +737,16 @@ void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, + bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, + unsigned int out_bytes); + ++/** ++ * vu_fs_cache_request: Send a slave message for an fs client ++ * @dev: a VuDev context ++ * @req: The request type (map, unmap, sync) ++ * @fd: an fd (only required for map, else must be -1) ++ * @fsm: The body of the message ++ * ++ * Returns: 0 or above for success, nevative errno on error ++ */ ++int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm); ++ + #endif /* LIBVHOST_USER_H */ +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0005-DAX-virtio-Add-shared-memory-capability.patch similarity index 83% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0005-DAX-virtio-Add-shared-memory-capability.patch index 86e14b1132..1bf17b8017 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0005-DAX-virtio-Add-shared-memory-capability.patch @@ -1,7 +1,7 @@ -From 71c89288b97c92ecb3a67ca8aa73619719dcfe9e Mon Sep 17 00:00:00 2001 +From b60b45a716a3dafa66e8c7a0bf9a9dec027bab1a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 27 Jul 2018 12:38:03 +0100 -Subject: [PATCH 04/29] DAX: virtio: Add shared memory capability +Subject: [PATCH 05/25] DAX: virtio: Add shared memory capability Define a new capability type 'VIRTIO_PCI_CAP_SHARED_MEMORY_CFG' and the data structure 'virtio_pci_cap64' to go with it. @@ -11,16 +11,17 @@ Multiple instances of the capability are allowed and distinguished by the 'id' field in the base capability. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- hw/virtio/virtio-pci.c | 20 ++++++++++++++++++++ hw/virtio/virtio-pci.h | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index f863f69ede..f17ea5a6e8 100644 +index 433060ac02..566f4bc13d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c -@@ -1136,6 +1136,26 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, +@@ -1149,6 +1149,26 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, return offset; } @@ -48,10 +49,10 @@ index f863f69ede..f17ea5a6e8 100644 unsigned size) { diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h -index d7d5d403a9..31ca339099 100644 +index 2446dcd9ae..5e5c4a4c6d 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h -@@ -247,4 +247,8 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); +@@ -252,4 +252,8 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); */ unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); @@ -61,5 +62,5 @@ index d7d5d403a9..31ca339099 100644 + #endif -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0006-DAX-virtio-fs-Add-cache-BAR.patch similarity index 66% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0006-DAX-virtio-fs-Add-cache-BAR.patch index 4d03f5b033..191d181d48 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0006-DAX-virtio-fs-Add-cache-BAR.patch @@ -1,7 +1,7 @@ -From 3996e9086ddd591494f9cb7f0eb7048a1b52200c Mon Sep 17 00:00:00 2001 +From a7a7e96dbf94ff8abcabd6165ce5054f84110e5e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 3 Jul 2018 16:33:52 +0100 -Subject: [PATCH 05/29] DAX: virtio-fs: Add cache BAR +Subject: [PATCH 06/25] DAX: virtio-fs: Add cache BAR Add a cache BAR into which files will be directly mapped. The size can be set with the cache-size= property, e.g. @@ -12,22 +12,29 @@ The default is no cache. Signed-off-by: Dr. David Alan Gilbert with PPC fixes by: Signed-off-by: Fabiano Rosas +Reviewed-by: Stefan Hajnoczi --- - hw/virtio/vhost-user-fs-pci.c | 23 ++++++++++++++++ - hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++ - include/hw/virtio/vhost-user-fs.h | 2 ++ - include/standard-headers/linux/virtio_fs.h | 2 ++ - 4 files changed, 59 insertions(+) + hw/virtio/vhost-user-fs-pci.c | 36 +++++++++++++++++++++++++++++++ + hw/virtio/vhost-user-fs.c | 32 +++++++++++++++++++++++++++ + include/hw/virtio/vhost-user-fs.h | 2 ++ + 3 files changed, 70 insertions(+) diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c -index 8bb389bd28..19aaa8d722 100644 +index 2ed8492b3f..40816f8dfd 100644 --- a/hw/virtio/vhost-user-fs-pci.c +++ b/hw/virtio/vhost-user-fs-pci.c -@@ -16,10 +16,12 @@ +@@ -12,14 +12,19 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/error.h" + #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-fs.h" #include "virtio-pci.h" #include "qom/object.h" +#include "standard-headers/linux/virtio_fs.h" ++ ++#define VIRTIO_FS_PCI_CACHE_BAR 2 struct VHostUserFSPCI { VirtIOPCIProxy parent_obj; @@ -36,48 +43,59 @@ index 8bb389bd28..19aaa8d722 100644 }; typedef struct VHostUserFSPCI VHostUserFSPCI; -@@ -39,6 +41,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +@@ -38,7 +43,9 @@ static Property vhost_user_fs_pci_properties[] = { + static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev); ++ bool modern_pio = vpci_dev->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; DeviceState *vdev = DEVICE(&dev->vdev); + uint64_t cachesize; if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { /* Also reserve config change and hiprio queue vectors */ -@@ -46,6 +49,26 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +@@ -46,6 +53,35 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) } qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + cachesize = dev->vdev.conf.cache_size; + ++ if (cachesize && modern_pio) { ++ /* ++ * We've not got enough BARs for the one used by the DAX cache ++ * and also the one used by modern_pio ++ */ ++ error_setg(errp, "DAX Cache can not be used together with modern_pio"); ++ return; ++ } ++ + /* + * The bar starts with the data/DAX cache + * Others will be added later. + */ + memory_region_init(&dev->cachebar, OBJECT(vpci_dev), -+ "vhost-fs-pci-cachebar", cachesize); ++ "vhost-user-fs-pci-cachebar", cachesize); + if (cachesize) { + memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache); + virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize, + VIRTIO_FS_SHMCAP_ID_CACHE); -+ } + -+ /* After 'realized' so the memory region exists */ -+ pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR, -+ PCI_BASE_ADDRESS_SPACE_MEMORY | -+ PCI_BASE_ADDRESS_MEM_PREFETCH | -+ PCI_BASE_ADDRESS_MEM_TYPE_64, -+ &dev->cachebar); ++ /* After 'realized' so the memory region exists */ ++ pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR, ++ PCI_BASE_ADDRESS_SPACE_MEMORY | ++ PCI_BASE_ADDRESS_MEM_PREFETCH | ++ PCI_BASE_ADDRESS_MEM_TYPE_64, ++ &dev->cachebar); ++ } } static void vhost_user_fs_pci_class_init(ObjectClass *klass, void *data) diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index ed036ad9c1..d111bf2af3 100644 +index c595957983..249a5032ef 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c -@@ -23,6 +23,16 @@ - #include "hw/virtio/vhost-user-fs.h" - #include "monitor/monitor.h" +@@ -35,6 +35,16 @@ static const int user_feature_bits[] = { + VHOST_INVALID_FEATURE_BIT + }; +/* + * The powerpc kernel code expects the memory to be accessible during @@ -92,7 +110,7 @@ index ed036ad9c1..d111bf2af3 100644 static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) { VHostUserFS *fs = VHOST_USER_FS(vdev); -@@ -162,6 +172,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) +@@ -175,6 +185,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserFS *fs = VHOST_USER_FS(dev); @@ -100,13 +118,13 @@ index ed036ad9c1..d111bf2af3 100644 unsigned int i; size_t len; int ret; -@@ -201,6 +212,26 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) +@@ -214,6 +225,26 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) VIRTQUEUE_MAX_SIZE); return; } + if (fs->conf.cache_size && + (!is_power_of_2(fs->conf.cache_size) || -+ fs->conf.cache_size < sysconf(_SC_PAGESIZE))) { ++ fs->conf.cache_size < qemu_real_host_page_size)) { + error_setg(errp, "cache-size property must be a power of 2 " + "no smaller than the page size"); + return; @@ -120,14 +138,14 @@ index ed036ad9c1..d111bf2af3 100644 + return; + } + -+ memory_region_init_ram_ptr(&fs->cache, OBJECT(vdev), ++ memory_region_init_ram_device_ptr(&fs->cache, OBJECT(vdev), + "virtio-fs-cache", + fs->conf.cache_size, cache_ptr); + } if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) { return; -@@ -276,6 +307,7 @@ static Property vuf_properties[] = { +@@ -288,6 +319,7 @@ static Property vuf_properties[] = { DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, conf.num_request_queues, 1), DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), @@ -136,7 +154,7 @@ index ed036ad9c1..d111bf2af3 100644 }; diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 6985752771..df6bf2a926 100644 +index 0d62834c25..04596799e3 100644 --- a/include/hw/virtio/vhost-user-fs.h +++ b/include/hw/virtio/vhost-user-fs.h @@ -28,6 +28,7 @@ typedef struct { @@ -147,25 +165,14 @@ index 6985752771..df6bf2a926 100644 } VHostUserFSConf; struct VHostUserFS { -@@ -41,6 +42,7 @@ struct VHostUserFS { - VirtQueue *hiprio_vq; +@@ -42,6 +43,7 @@ struct VHostUserFS { + int32_t bootindex; /*< public >*/ + MemoryRegion cache; }; #endif /* _QEMU_VHOST_USER_FS_H */ -diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h -index a32fe8a64c..808aa3a402 100644 ---- a/include/standard-headers/linux/virtio_fs.h -+++ b/include/standard-headers/linux/virtio_fs.h -@@ -19,4 +19,6 @@ struct virtio_fs_config { - /* For the id field in virtio_pci_shm_cap */ - #define VIRTIO_FS_SHMCAP_ID_CACHE 0 - -+#define VIRTIO_FS_PCI_CACHE_BAR 2 -+ - #endif /* _LINUX_VIRTIO_FS_H */ -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0007-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0007-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch new file mode 100644 index 0000000000..5875805476 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0007-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch @@ -0,0 +1,257 @@ +From 80b95774eb85996ffe389d286f944267df2d5780 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 4 Jul 2018 18:51:42 +0100 +Subject: [PATCH 07/25] DAX: virtio-fs: Add vhost-user slave commands for + mapping + +The daemon may request that fd's be mapped into the virtio-fs cache +visible to the guest. +These mappings are triggered by commands sent over the slave fd +from the daemon. + +Signed-off-by: Dr. David Alan Gilbert +--- + docs/interop/vhost-user.rst | 21 ++++++++ + hw/virtio/vhost-user-fs.c | 66 +++++++++++++++++++++++ + hw/virtio/vhost-user.c | 15 ++++++ + include/hw/virtio/vhost-user-fs.h | 49 +++++++++++++++++ + subprojects/libvhost-user/libvhost-user.h | 2 + + 5 files changed, 153 insertions(+) + +diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst +index edc3ad84a3..3e0dfe50d0 100644 +--- a/docs/interop/vhost-user.rst ++++ b/docs/interop/vhost-user.rst +@@ -1436,6 +1436,27 @@ Slave message types + + The state.num field is currently reserved and must be set to 0. + ++``VHOST_USER_SLAVE_FS_MAP`` ++ :id: 6 ++ :equivalent ioctl: N/A ++ :slave payload: ``struct VhostUserFSSlaveMsg`` ++ :master payload: N/A ++ ++ Requests that an fd, provided in the ancillary data, be mmapped ++ into the virtio-fs cache; multiple chunks can be mapped in one ++ command. ++ A reply is generated indicating whether mapping succeeded. ++ ++``VHOST_USER_SLAVE_FS_UNMAP`` ++ :id: 7 ++ :equivalent ioctl: N/A ++ :slave payload: ``struct VhostUserFSSlaveMsg`` ++ :master payload: N/A ++ ++ Requests that the range in the virtio-fs cache is unmapped; ++ multiple chunks can be unmapped in one command. ++ A reply is generated indicating whether unmapping succeeded. ++ + .. _reply_ack: + + VHOST_USER_PROTOCOL_F_REPLY_ACK +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 249a5032ef..f724ea3561 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -45,6 +45,72 @@ static const int user_feature_bits[] = { + #define DAX_WINDOW_PROT PROT_NONE + #endif + ++/* ++ * The message apparently had 'received_size' bytes, check this ++ * matches the count in the message. ++ * ++ * Returns true if the size matches. ++ */ ++static bool check_slave_message_entries(const VhostUserFSSlaveMsg *sm, ++ size_t received_size) ++{ ++ size_t tmp; ++ ++ /* ++ * VhostUserFSSlaveMsg consists of a body followed by 'n' entries, ++ * (each VhostUserFSSlaveMsgEntry). There's a maximum of ++ * VHOST_USER_FS_SLAVE_MAX_ENTRIES of these. ++ */ ++ if (received_size <= sizeof(VhostUserFSSlaveMsg)) { ++ error_report("%s: Short VhostUserFSSlaveMsg size, %zd", __func__, ++ received_size); ++ return false; ++ } ++ ++ tmp = received_size - sizeof(VhostUserFSSlaveMsg); ++ if (tmp % sizeof(VhostUserFSSlaveMsgEntry)) { ++ error_report("%s: Non-multiple VhostUserFSSlaveMsg size, %zd", __func__, ++ received_size); ++ return false; ++ } ++ ++ tmp /= sizeof(VhostUserFSSlaveMsgEntry); ++ if (tmp != sm->hdr.count) { ++ error_report("%s: VhostUserFSSlaveMsg count mismatch, %zd count: %d", ++ __func__, tmp, sm->hdr.count); ++ return false; ++ } ++ ++ if (sm->hdr.count > VHOST_USER_FS_SLAVE_MAX_ENTRIES) { ++ error_report("%s: VhostUserFSSlaveMsg too many entries: %d", ++ __func__, sm->hdr.count); ++ return false; ++ } ++ return true; ++} ++ ++uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm, int fd) ++{ ++ if (!check_slave_message_entries(sm, message_size)) { ++ return (uint64_t)-1; ++ } ++ ++ /* TODO */ ++ return (uint64_t)-1; ++} ++ ++uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm) ++{ ++ if (!check_slave_message_entries(sm, message_size)) { ++ return (uint64_t)-1; ++ } ++ ++ /* TODO */ ++ return (uint64_t)-1; ++} ++ + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index cdbdb5b469..60f769b1b7 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -12,6 +12,7 @@ + #include "qapi/error.h" + #include "hw/virtio/vhost.h" + #include "hw/virtio/vhost-user.h" ++#include "hw/virtio/vhost-user-fs.h" + #include "hw/virtio/vhost-backend.h" + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-net.h" +@@ -133,6 +134,10 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, ++ VHOST_USER_SLAVE_VRING_CALL = 4, ++ VHOST_USER_SLAVE_VRING_ERR = 5, ++ VHOST_USER_SLAVE_FS_MAP = 6, ++ VHOST_USER_SLAVE_FS_UNMAP = 7, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +@@ -219,6 +224,7 @@ typedef union { + VhostUserCryptoSession session; + VhostUserVringArea area; + VhostUserInflight inflight; ++ VhostUserFSSlaveMsgMax fs_max; + } VhostUserPayload; + + typedef struct VhostUserMsg { +@@ -1542,6 +1548,15 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + fd ? fd[0] : -1); + break; ++#ifdef CONFIG_VHOST_USER_FS ++ case VHOST_USER_SLAVE_FS_MAP: ++ ret = vhost_user_fs_slave_map(dev, hdr.size, &payload.fs, ++ fd ? fd[0] : -1); ++ break; ++ case VHOST_USER_SLAVE_FS_UNMAP: ++ ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs); ++ break; ++#endif + default: + error_report("Received unexpected msg type: %d.", hdr.request); + ret = EINVAL; +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 04596799e3..1edb328c40 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -23,6 +23,49 @@ + #define TYPE_VHOST_USER_FS "vhost-user-fs-device" + OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS) + ++/* Structures carried over the slave channel back to QEMU */ ++#define VHOST_USER_FS_SLAVE_MAX_ENTRIES 32 ++ ++/* For the flags field of VhostUserFSSlaveMsg */ ++#define VHOST_USER_FS_FLAG_MAP_R (1u << 0) ++#define VHOST_USER_FS_FLAG_MAP_W (1u << 1) ++ ++typedef struct { ++ /* Offsets within the file being mapped */ ++ uint64_t fd_offset; ++ /* Offsets within the cache */ ++ uint64_t c_offset; ++ /* Lengths of sections */ ++ uint64_t len; ++ /* Flags, from VHOST_USER_FS_FLAG_* */ ++ uint64_t flags; ++} VhostUserFSSlaveMsgEntry; ++ ++typedef struct { ++ /* Spare */ ++ uint32_t align32; ++ /* Number of entries */ ++ uint16_t count; ++ /* Spare */ ++ uint16_t align16; ++} VhostUserFSSlaveMsgHdr; ++ ++/* ++ * This is really a structure with a variable number of entries, ++ * but we want to avoid a variable length array in the union, ++ * so have one version with the variable length array ++ * for places where we have the partial allocation. ++ */ ++typedef struct { ++ VhostUserFSSlaveMsgHdr hdr; ++ VhostUserFSSlaveMsgEntry entries[]; ++} VhostUserFSSlaveMsg; ++ ++typedef struct { ++ VhostUserFSSlaveMsgHdr hdr; ++ VhostUserFSSlaveMsgEntry entries[VHOST_USER_FS_SLAVE_MAX_ENTRIES]; ++} VhostUserFSSlaveMsgMax; ++ + typedef struct { + CharBackend chardev; + char *tag; +@@ -46,4 +89,10 @@ struct VHostUserFS { + MemoryRegion cache; + }; + ++/* Callbacks from the vhost-user code for slave commands */ ++uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm, int fd); ++uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm); ++ + #endif /* _QEMU_VHOST_USER_FS_H */ +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index d7511b80a3..8b50918041 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -119,6 +119,8 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, + VHOST_USER_SLAVE_VRING_ERR = 5, ++ VHOST_USER_SLAVE_FS_MAP = 6, ++ VHOST_USER_SLAVE_FS_UNMAP = 7, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0008-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0008-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch new file mode 100644 index 0000000000..34021bde4f --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0008-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch @@ -0,0 +1,158 @@ +From 6875d5e80530ea2cd81661385a24a3e04dd369a6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 4 Jul 2018 20:01:51 +0100 +Subject: [PATCH 08/25] DAX: virtio-fs: Fill in slave commands for mapping + +Fill in definitions for map, unmap and sync commands. + +Signed-off-by: Dr. David Alan Gilbert +with fix by misono.tomohiro@fujitsu.com +Reviewed-by: Stefan Hajnoczi +--- + hw/virtio/vhost-user-fs.c | 117 ++++++++++++++++++++++++++++++++++++-- + 1 file changed, 113 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index f724ea3561..60ae21b7bc 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -92,23 +92,132 @@ static bool check_slave_message_entries(const VhostUserFSSlaveMsg *sm, + uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, size_t message_size, + VhostUserFSSlaveMsg *sm, int fd) + { ++ VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev), ++ TYPE_VHOST_USER_FS); ++ if (!fs) { ++ error_report("%s: Bad fs ptr", __func__); ++ return (uint64_t)-1; ++ } + if (!check_slave_message_entries(sm, message_size)) { + return (uint64_t)-1; + } + +- /* TODO */ +- return (uint64_t)-1; ++ size_t cache_size = fs->conf.cache_size; ++ if (!cache_size) { ++ error_report("map called when DAX cache not present"); ++ return (uint64_t)-1; ++ } ++ void *cache_host = memory_region_get_ram_ptr(&fs->cache); ++ ++ unsigned int i; ++ int res = 0; ++ ++ if (fd < 0) { ++ error_report("Bad fd for map"); ++ return (uint64_t)-1; ++ } ++ ++ for (i = 0; i < sm->hdr.count; i++) { ++ VhostUserFSSlaveMsgEntry *e = &sm->entries[i]; ++ if (e->len == 0) { ++ continue; ++ } ++ ++ if ((e->c_offset + e->len) < e->len || ++ (e->c_offset + e->len) > cache_size) { ++ error_report("Bad offset/len for map [%d] %" PRIx64 "+%" PRIx64, ++ i, e->c_offset, e->len); ++ res = -1; ++ break; ++ } ++ ++ if (mmap(cache_host + e->c_offset, e->len, ++ ((e->flags & VHOST_USER_FS_FLAG_MAP_R) ? PROT_READ : 0) | ++ ((e->flags & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0), ++ MAP_SHARED | MAP_FIXED, ++ fd, e->fd_offset) != (cache_host + e->c_offset)) { ++ res = -errno; ++ error_report("map failed err %d [%d] %" PRIx64 "+%" PRIx64 " from %" ++ PRIx64, errno, i, e->c_offset, e->len, ++ e->fd_offset); ++ break; ++ } ++ } ++ ++ if (res) { ++ /* Something went wrong, unmap them all */ ++ vhost_user_fs_slave_unmap(dev, message_size, sm); ++ } ++ return (uint64_t)res; + } + + uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, size_t message_size, + VhostUserFSSlaveMsg *sm) + { ++ VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev), ++ TYPE_VHOST_USER_FS); ++ if (!fs) { ++ error_report("%s: Bad fs ptr", __func__); ++ return (uint64_t)-1; ++ } + if (!check_slave_message_entries(sm, message_size)) { + return (uint64_t)-1; + } + +- /* TODO */ +- return (uint64_t)-1; ++ size_t cache_size = fs->conf.cache_size; ++ if (!cache_size) { ++ /* ++ * Since dax cache is disabled, there should be no unmap request. ++ * Howerver we still receives whole range unmap request during umount ++ * for cleanup. Ignore it. ++ */ ++ if (sm->entries[0].len == ~(uint64_t)0) { ++ return 0; ++ } ++ ++ error_report("unmap called when DAX cache not present"); ++ return (uint64_t)-1; ++ } ++ void *cache_host = memory_region_get_ram_ptr(&fs->cache); ++ ++ unsigned int i; ++ int res = 0; ++ ++ /* ++ * Note even if one unmap fails we try the rest, since the effect ++ * is to clean up as much as possible. ++ */ ++ for (i = 0; i < sm->hdr.count; i++) { ++ VhostUserFSSlaveMsgEntry *e = &sm->entries[i]; ++ void *ptr; ++ if (e->len == 0) { ++ continue; ++ } ++ ++ if (e->len == ~(uint64_t)0) { ++ /* Special case meaning the whole arena */ ++ e->len = cache_size; ++ } ++ ++ if ((e->c_offset + e->len) < e->len || ++ (e->c_offset + e->len) > cache_size) { ++ error_report("Bad offset/len for unmap [%d] %" PRIx64 "+%" PRIx64, ++ i, e->c_offset, e->len); ++ res = -1; ++ continue; ++ } ++ ++ ptr = mmap(cache_host + e->c_offset, e->len, DAX_WINDOW_PROT, ++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); ++ if (ptr != (cache_host + e->c_offset)) { ++ res = -errno; ++ error_report("mmap failed (%s) [%d] %" PRIx64 "+%" PRIx64 " from %" ++ PRIx64 " res: %p", strerror(errno), i, e->c_offset, ++ e->len, e->fd_offset, ptr); ++ } ++ } ++ ++ return (uint64_t)res; + } + + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0009-DAX-virtiofsd-Add-cache-accessor-functions.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0009-DAX-virtiofsd-Add-cache-accessor-functions.patch new file mode 100644 index 0000000000..ebfc96c291 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0009-DAX-virtiofsd-Add-cache-accessor-functions.patch @@ -0,0 +1,81 @@ +From e671b4ae1189baef10898d76f975c8c3a9e36b6c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 5 Jul 2018 18:20:34 +0100 +Subject: [PATCH 09/25] DAX: virtiofsd Add cache accessor functions + +Add low level functions that the clients can use to map/unmap cache +areas. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi +--- + tools/virtiofsd/fuse_lowlevel.h | 21 +++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.c | 18 ++++++++++++++++++ + 2 files changed, 39 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 4b4e8c9724..29e26c556d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -29,6 +29,8 @@ + #include + #include + ++#include "subprojects/libvhost-user/libvhost-user.h" ++ + /* + * Miscellaneous definitions + */ +@@ -1972,4 +1974,23 @@ void fuse_session_process_buf(struct fuse_session *se, + */ + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + ++/** ++ * For use with virtio-fs; request an fd be mapped into the cache ++ * ++ * @param req The request that triggered this action ++ * @param msg A set of mapping requests ++ * @param fd The fd to map ++ * @return Zero on success ++ */ ++int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); ++ ++/** ++ * For use with virtio-fs; request unmapping of part of the cache ++ * ++ * @param se The session this request is on ++ * @param msg A set of unmapping requests ++ * @return Zero on success ++ */ ++int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); ++ + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 3d54f01955..89e923dd26 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -1081,3 +1081,21 @@ void virtio_session_close(struct fuse_session *se) + g_free(se->virtio_dev); + se->virtio_dev = NULL; + } ++ ++int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) ++{ ++ if (!req->se->virtio_dev) { ++ return -ENODEV; ++ } ++ return vu_fs_cache_request(&req->se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_MAP, fd, msg); ++} ++ ++int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) ++{ ++ if (!se->virtio_dev) { ++ return -ENODEV; ++ } ++ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, ++ -1, msg); ++} +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0010-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch similarity index 79% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0010-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch index 8b21a9e58e..4619f722b4 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0010-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch @@ -1,24 +1,30 @@ -From c3273cefbec6f5637189ad1cb9a8b7722cc01294 Mon Sep 17 00:00:00 2001 +From 2df0c7ed2f89d3e085ed1abb592a72b4ab66b753 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 6 Jul 2018 18:03:49 +0100 -Subject: [PATCH 09/29] DAX: virtiofsd: Add setup/remove mappings fuse commands +Subject: [PATCH 10/25] DAX: virtiofsd: Add setup/remove mappings fuse commands Add commands so that the guest kernel can ask the daemon to map file sections into a guest kernel visible cache. +Note: Catherine Ho had sent a patch to fix an issue with multiple +removemapping. It was a merge issue though. + Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Peng Tao +Including-fixes: Catherine Ho +Signed-off-by: Catherine Ho +Reviewed-by: Stefan Hajnoczi --- - tools/virtiofsd/fuse_lowlevel.c | 67 +++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_lowlevel.c | 69 +++++++++++++++++++++++++++++++++ tools/virtiofsd/fuse_lowlevel.h | 23 ++++++++++- - 2 files changed, 89 insertions(+), 1 deletion(-) + 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index e94b71110b..1c3790130a 100644 +index e4679c73ab..7a0fc80f2f 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1868,6 +1868,71 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, +@@ -1876,6 +1876,73 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, } } @@ -65,16 +71,18 @@ index e94b71110b..1c3790130a 100644 + struct fuse_removemapping_one *one; + + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { ++ if (!arg || !arg->count || ++ (uint64_t)arg->count * sizeof(*one) >= SIZE_MAX) { ++ fuse_log(FUSE_LOG_ERR, "do_removemapping: invalid arg %p\n", arg); + fuse_reply_err(req, EINVAL); + return; + } + -+ one = fuse_mbuf_iter_advance(iter, sizeof(*one)); ++ one = fuse_mbuf_iter_advance(iter, arg->count * sizeof(*one)); + if (!one) { + fuse_log( + FUSE_LOG_ERR, -+ "do_removemapping: invalid in, expected %d * %ld, has %ld - %ld\n", ++ "do_removemapping: invalid in, expected %d * %zd, has %zd - %zd\n", + arg->count, sizeof(*one), iter->size, iter->pos); + fuse_reply_err(req, EINVAL); + return; @@ -90,7 +98,7 @@ index e94b71110b..1c3790130a 100644 static void do_init(fuse_req_t req, fuse_ino_t nodeid, struct fuse_mbuf_iter *iter) { -@@ -2258,6 +2323,8 @@ static struct { +@@ -2280,6 +2347,8 @@ static struct { [FUSE_RENAME2] = { do_rename2, "RENAME2" }, [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, [FUSE_LSEEK] = { do_lseek, "LSEEK" }, @@ -100,7 +108,7 @@ index e94b71110b..1c3790130a 100644 #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index b0d111bcb2..2851840cc2 100644 +index 29e26c556d..f628bbe475 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -24,6 +24,7 @@ @@ -111,7 +119,7 @@ index b0d111bcb2..2851840cc2 100644 #include #include -@@ -1170,7 +1171,6 @@ struct fuse_lowlevel_ops { +@@ -1172,7 +1173,6 @@ struct fuse_lowlevel_ops { */ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi); @@ -119,7 +127,7 @@ index b0d111bcb2..2851840cc2 100644 /** * Copy a range of data from one file to another * -@@ -1226,6 +1226,27 @@ struct fuse_lowlevel_ops { +@@ -1228,6 +1228,27 @@ struct fuse_lowlevel_ops { */ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, struct fuse_file_info *fi); @@ -148,5 +156,5 @@ index b0d111bcb2..2851840cc2 100644 /** -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0011-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch similarity index 77% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0011-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch index 66837dbf60..d6639187ed 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0011-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch @@ -1,19 +1,20 @@ -From 7029506e6b23fc15f2b7c4a6a62aa3a0ee58fb02 Mon Sep 17 00:00:00 2001 +From 2c4291ea2640da92e8e5033f1077c303781e162e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 6 Jul 2018 19:52:49 +0100 -Subject: [PATCH 10/29] DAX: virtiofsd: Add setup/remove mapping handlers to +Subject: [PATCH 11/25] DAX: virtiofsd: Add setup/remove mapping handlers to passthrough_ll Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- tools/virtiofsd/passthrough_ll.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5fb36d9407..784bdcff34 100644 +index 38b2af8599..63ec2e5030 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c -@@ -2891,6 +2891,22 @@ static void lo_destroy(void *userdata) +@@ -3341,6 +3341,22 @@ static void lo_destroy(void *userdata) pthread_mutex_unlock(&lo->mutex); } @@ -21,7 +22,7 @@ index 5fb36d9407..784bdcff34 100644 + uint64_t len, uint64_t moffset, uint64_t flags, + struct fuse_file_info *fi) +{ -+ // TODO ++ /* TODO */ + fuse_reply_err(req, ENOSYS); +} + @@ -29,14 +30,14 @@ index 5fb36d9407..784bdcff34 100644 + fuse_ino_t ino, unsigned num, + struct fuse_removemapping_one *argp) +{ -+ // TODO ++ /* TODO */ + fuse_reply_err(req, ENOSYS); +} + static struct fuse_lowlevel_ops lo_oper = { .init = lo_init, .lookup = lo_lookup, -@@ -2932,6 +2948,8 @@ static struct fuse_lowlevel_ops lo_oper = { +@@ -3382,6 +3398,8 @@ static struct fuse_lowlevel_ops lo_oper = { #endif .lseek = lo_lseek, .destroy = lo_destroy, @@ -46,5 +47,5 @@ index 5fb36d9407..784bdcff34 100644 /* Print vhost-user.json backend program capabilities */ -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0012-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch similarity index 53% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0012-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch index 27308cc641..1180eacbd8 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0012-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch @@ -1,24 +1,30 @@ -From a3f692a36307054148e7db640dc7a64158a98250 Mon Sep 17 00:00:00 2001 -From: Vivek Goyal -Date: Thu, 30 Aug 2018 14:22:10 -0400 -Subject: [PATCH 13/29] DAX: virtiofsd: Make setupmapping work only with inode +From 57d8737e1d19a8551653a309da0d7475046a7fb5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 9 Jul 2018 19:57:16 +0100 +Subject: [PATCH 12/25] DAX: virtiofsd: Wire up passthrough_ll's + lo_setupmapping + +Wire up passthrough_ll's setupmapping to allocate, send to virtio +and then reply OK. Guest might not pass file pointer. In that case using inode info, open the file again, mmap() and close fd. +Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal With fix from: Signed-off-by: Fotis Xenakis +Reviewed-by: Stefan Hajnoczi --- - tools/virtiofsd/fuse_lowlevel.c | 13 ++++++++++-- - tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++++++++---- - 2 files changed, 43 insertions(+), 6 deletions(-) + tools/virtiofsd/fuse_lowlevel.c | 13 ++++++-- + tools/virtiofsd/passthrough_ll.c | 57 ++++++++++++++++++++++++++++++-- + 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 1c3790130a..4cfd4c3547 100644 +index 7a0fc80f2f..7a7c41aeb2 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1897,8 +1897,17 @@ static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid, +@@ -1905,8 +1905,17 @@ static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid, } if (req->se->op.setupmapping) { @@ -39,66 +45,73 @@ index 1c3790130a..4cfd4c3547 100644 fuse_reply_err(req, ENOSYS); } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 056b395574..ebd5a9b215 100644 +index 63ec2e5030..6eedf98c81 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c -@@ -2895,11 +2895,19 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, +@@ -3345,8 +3345,61 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, uint64_t len, uint64_t moffset, uint64_t flags, struct fuse_file_info *fi) { -- int ret = 0; +- /* TODO */ +- fuse_reply_err(req, ENOSYS); + struct lo_data *lo = lo_data(req); -+ int ret = 0, fd, res; - VhostUserFSSlaveMsg msg = { 0 }; - uint64_t vhu_flags; ++ int ret = 0, fd; ++ VhostUserFSSlaveMsg *msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + ++ sizeof(VhostUserFSSlaveMsgEntry)); ++ uint64_t vhu_flags; + char *buf; - bool writable = flags & O_RDWR; - ++ bool writable = flags & O_RDWR; ++ + fuse_log(FUSE_LOG_DEBUG, + "lo_setupmapping(ino=%" PRIu64 ", fi=0x%p," + " foffset=%" PRIu64 ", len=%" PRIu64 ", moffset=%" PRIu64 + ", flags=%" PRIu64 ")\n", + ino, (void *)fi, foffset, len, moffset, flags); + - vhu_flags = VHOST_USER_FS_FLAG_MAP_R; - if (writable) { - vhu_flags |= VHOST_USER_FS_FLAG_MAP_W; -@@ -2910,12 +2918,32 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, - msg.c_offset[0] = moffset; - msg.flags[0] = vhu_flags; - -- if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) { -- fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__, -- (int)fi->fh); ++ vhu_flags = VHOST_USER_FS_FLAG_MAP_R; ++ if (writable) { ++ vhu_flags |= VHOST_USER_FS_FLAG_MAP_W; ++ } ++ ++ msg->hdr.count = 1; ++ msg->entries[0].fd_offset = foffset; ++ msg->entries[0].len = len; ++ msg->entries[0].c_offset = moffset; ++ msg->entries[0].flags = vhu_flags; ++ + if (fi) { + fd = lo_fi_fd(req, fi); + } else { -+ res = asprintf(&buf, "%i", lo_fd(req, ino)); -+ if (res == -1) { ++ ret = asprintf(&buf, "%i", lo_fd(req, ino)); ++ if (ret == -1) { ++ g_free(msg); + return (void)fuse_reply_err(req, errno); + } + + fd = openat(lo->proc_self_fd, buf, flags); + free(buf); + if (fd == -1) { ++ g_free(msg); + return (void)fuse_reply_err(req, errno); + } + } + -+ if (fuse_virtio_map(req, &msg, fd)) { ++ ret = fuse_virtio_map(req, msg, fd); ++ if (ret < 0) { + fuse_log(FUSE_LOG_ERR, + "%s: map over virtio failed (ino=%" PRId64 -+ "fd=%d moffset=0x%" PRIx64 ")\n", -+ __func__, ino, fi ? (int)fi->fh : lo_fd(req, ino), moffset); - ret = EINVAL; - } - ++ "fd=%d moffset=0x%" PRIx64 "). err = %d\n", ++ __func__, ino, fd, moffset, ret); ++ } ++ + if (!fi) { + close(fd); + } - fuse_reply_err(req, ret); ++ fuse_reply_err(req, -ret); ++ g_free(msg); } + static void lo_removemapping(fuse_req_t req, struct fuse_session *se, -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0013-DAX-virtiofsd-Make-lo_removemapping-work.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0013-DAX-virtiofsd-Make-lo_removemapping-work.patch new file mode 100644 index 0000000000..5563a750e3 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0013-DAX-virtiofsd-Make-lo_removemapping-work.patch @@ -0,0 +1,64 @@ +From c181c2895e52afacb1b13a9c9868b6a161de7cf3 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Mon, 13 Aug 2018 11:52:43 -0400 +Subject: [PATCH 13/25] DAX: virtiofsd: Make lo_removemapping() work + +Let guest pass in the offset in dax window a mapping is currently +mapped at and needs to be removed. + +Vivek added the initial support to remove single mapping and later Peng +added patch to support removing multiple mappings in single command. + +Signed-off-by: Vivek Goyal +Signed-off-by: Peng Tao +Reviewed-by: Stefan Hajnoczi +--- + tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6eedf98c81..c0cb0e6286 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -3406,8 +3406,36 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se, + fuse_ino_t ino, unsigned num, + struct fuse_removemapping_one *argp) + { +- /* TODO */ +- fuse_reply_err(req, ENOSYS); ++ VhostUserFSSlaveMsg *msg; ++ size_t alloc_count = (num > VHOST_USER_FS_SLAVE_MAX_ENTRIES) ? ++ VHOST_USER_FS_SLAVE_MAX_ENTRIES : num; ++ int ret = 0; ++ msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + ++ alloc_count * sizeof(VhostUserFSSlaveMsgEntry)); ++ ++ for (int i = 0, o = 0; num > 0; i++, argp++) { ++ VhostUserFSSlaveMsgEntry *e = &msg->entries[o]; ++ ++ e->len = argp->len; ++ e->c_offset = argp->moffset; ++ ++ o++; ++ if (--num == 0 || o == VHOST_USER_FS_SLAVE_MAX_ENTRIES) { ++ msg->hdr.count = o; ++ ret = fuse_virtio_unmap(se, msg); ++ if (ret < 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: unmap over virtio failed " ++ "(offset=0x%" PRIx64 ", len=0x%" PRIx64 "). err=%d\n", ++ __func__, argp->moffset, argp->len, ret); ++ break; ++ } ++ o = 0; ++ } ++ } ++ ++ fuse_reply_err(req, -ret); ++ g_free(msg); + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch similarity index 78% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch index 1e2aa5c58f..39d0ead824 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch @@ -1,12 +1,13 @@ -From 7c14a24ad467b9404b95345c64e8c5ef5e6d209c Mon Sep 17 00:00:00 2001 +From ebde2d4244180811b1c53d483b843cd88bcf69fd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 30 Nov 2018 11:47:36 +0000 -Subject: [PATCH 14/29] DAX: virtiofsd: route se down to destroy method +Subject: [PATCH 14/25] DAX: virtiofsd: route se down to destroy method We're going to need to pass the session down to destroy so that it can pass it back to do the remove mapping. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- tools/virtiofsd/fuse_lowlevel.c | 6 +++--- tools/virtiofsd/fuse_lowlevel.h | 2 +- @@ -14,10 +15,10 @@ Signed-off-by: Dr. David Alan Gilbert 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 4cfd4c3547..a2480d4aa1 100644 +index 7a7c41aeb2..82ebd29b22 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2211,7 +2211,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, +@@ -2235,7 +2235,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, se->got_destroy = 1; se->got_init = 0; if (se->op.destroy) { @@ -26,7 +27,7 @@ index 4cfd4c3547..a2480d4aa1 100644 } send_reply_ok(req, NULL, 0); -@@ -2438,7 +2438,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, +@@ -2462,7 +2462,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, se->got_destroy = 1; se->got_init = 0; if (se->op.destroy) { @@ -35,7 +36,7 @@ index 4cfd4c3547..a2480d4aa1 100644 } } else { goto reply_err; -@@ -2526,7 +2526,7 @@ void fuse_session_destroy(struct fuse_session *se) +@@ -2551,7 +2551,7 @@ void fuse_session_destroy(struct fuse_session *se) { if (se->got_init && !se->got_destroy) { if (se->op.destroy) { @@ -45,10 +46,10 @@ index 4cfd4c3547..a2480d4aa1 100644 } pthread_rwlock_destroy(&se->init_rwlock); diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 2851840cc2..2259623776 100644 +index f628bbe475..1cb3b8a31a 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -208,7 +208,7 @@ struct fuse_lowlevel_ops { +@@ -209,7 +209,7 @@ struct fuse_lowlevel_ops { * * @param userdata the user data passed to fuse_session_new() */ @@ -58,10 +59,10 @@ index 2851840cc2..2259623776 100644 /** * Look up a directory entry by name and get its attributes. diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ebd5a9b215..0d3cda8d2f 100644 +index c0cb0e6286..e17733b1ef 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c -@@ -2871,7 +2871,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +@@ -3321,7 +3321,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, } } @@ -71,5 +72,5 @@ index ebd5a9b215..0d3cda8d2f 100644 struct lo_data *lo = (struct lo_data *)userdata; -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch new file mode 100644 index 0000000000..d66d4c796e --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch @@ -0,0 +1,41 @@ +From b017dcb2b5531281d23831332298f3b6fbe61ddc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 30 Nov 2018 11:50:25 +0000 +Subject: [PATCH 15/25] DAX: virtiofsd: Perform an unmap on destroy + +Force unmap all remaining dax cache entries on a destroy. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e17733b1ef..20c5ea897b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -3325,6 +3325,20 @@ static void lo_destroy(void *userdata, struct fuse_session *se) + { + struct lo_data *lo = (struct lo_data *)userdata; + ++ if (fuse_lowlevel_is_virtio(se)) { ++ VhostUserFSSlaveMsg *msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + ++ sizeof(VhostUserFSSlaveMsgEntry)); ++ ++ msg->hdr.count = 0; ++ msg->entries[0].len = ~(uint64_t)0; /* Special: means 'all' */ ++ msg->entries[0].c_offset = 0; ++ if (fuse_virtio_unmap(se, msg)) { ++ fuse_log(FUSE_LOG_ERR, "%s: unmap during destroy failed\n", ++ __func__); ++ } ++ g_free(msg); ++ } ++ + pthread_mutex_lock(&lo->mutex); + while (true) { + GHashTableIter iter; +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0016-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0016-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch new file mode 100644 index 0000000000..c6ce5a94d4 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0016-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch @@ -0,0 +1,245 @@ +From e4fee76ba535124cf9fbf2058c87875255397868 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 11:54:02 +0100 +Subject: [PATCH 16/25] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO + +Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a +client to ask qemu to perform a read/write from an fd directly +to GPA. + +Signed-off-by: Dr. David Alan Gilbert +--- + docs/interop/vhost-user.rst | 16 ++++ + hw/virtio/trace-events | 6 ++ + hw/virtio/vhost-user-fs.c | 104 ++++++++++++++++++++++ + hw/virtio/vhost-user.c | 12 ++- + include/hw/virtio/vhost-user-fs.h | 2 + + subprojects/libvhost-user/libvhost-user.h | 1 + + 6 files changed, 139 insertions(+), 2 deletions(-) + +diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst +index 3e0dfe50d0..1b3e0662a5 100644 +--- a/docs/interop/vhost-user.rst ++++ b/docs/interop/vhost-user.rst +@@ -1457,6 +1457,22 @@ Slave message types + multiple chunks can be unmapped in one command. + A reply is generated indicating whether unmapping succeeded. + ++``VHOST_USER_SLAVE_FS_IO`` ++ :id: 8 ++ :equivalent ioctl: N/A ++ :slave payload: ``struct VhostUserFSSlaveMsg`` ++ :master payload: N/A ++ ++ Requests that IO be performed directly from an fd, passed in ancillary ++ data, to guest memory on behalf of the daemon; this is normally for a ++ case where a memory region isn't visible to the daemon. slave payload ++ has flags which determine the direction of IO operation. ++ ++ The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to ++ read from the file into RAM. ++ The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to ++ write to the file from RAM. ++ + .. _reply_ack: + + VHOST_USER_PROTOCOL_F_REPLY_ACK +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 8ed19e9d0c..5ddd1b3659 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_set_owner(void *dev) "dev: %p" + vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 + ++# vhost-user-fs.c ++ ++vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd" ++vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd" ++vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd" ++ + # virtio.c + virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" + virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 60ae21b7bc..2b022fc676 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -23,6 +23,8 @@ + #include "hw/virtio/vhost-user-fs.h" + #include "monitor/monitor.h" + #include "sysemu/sysemu.h" ++#include "exec/address-spaces.h" ++#include "trace.h" + + static const int user_feature_bits[] = { + VIRTIO_F_VERSION_1, +@@ -220,6 +222,108 @@ uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, size_t message_size, + return (uint64_t)res; + } + ++uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm, int fd) ++{ ++ VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev), ++ TYPE_VHOST_USER_FS); ++ if (!fs) { ++ error_report("%s: Bad fs ptr", __func__); ++ return (uint64_t)-1; ++ } ++ if (!check_slave_message_entries(sm, message_size)) { ++ return (uint64_t)-1; ++ } ++ ++ unsigned int i; ++ int res = 0; ++ size_t done = 0; ++ ++ if (fd < 0) { ++ error_report("Bad fd for io"); ++ return (uint64_t)-1; ++ } ++ ++ for (i = 0; i < sm->hdr.count && !res; i++) { ++ VhostUserFSSlaveMsgEntry *e = &sm->entries[i]; ++ if (e->len == 0) { ++ continue; ++ } ++ ++ size_t len = e->len; ++ uint64_t fd_offset = e->fd_offset; ++ hwaddr gpa = e->c_offset; ++ ++ while (len && !res) { ++ hwaddr xlat, xlat_len; ++ bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W; ++ MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa, ++ &xlat, &xlat_len, ++ is_write, ++ MEMTXATTRS_UNSPECIFIED); ++ if (!mr || !xlat_len) { ++ error_report("No guest region found for 0x%" HWADDR_PRIx, gpa); ++ res = -EFAULT; ++ break; ++ } ++ ++ trace_vhost_user_fs_slave_io_loop(mr->name, ++ (uint64_t)xlat, ++ memory_region_is_ram(mr), ++ memory_region_is_romd(mr), ++ (size_t)xlat_len); ++ if (xlat_len > len) { ++ xlat_len = len; ++ } ++ ++ if (!mr->ram_block) { ++ error_report("No guest region found for 0x%" HWADDR_PRIx, gpa); ++ res = -EFAULT; ++ break; ++ } ++ ++ void *hostptr = qemu_map_ram_ptr(mr->ram_block, ++ xlat); ++ ssize_t transferred; ++ if (e->flags & VHOST_USER_FS_FLAG_MAP_R) { ++ /* Read from file into RAM */ ++ if (mr->readonly) { ++ res = -EFAULT; ++ break; ++ } ++ transferred = pread(fd, hostptr, xlat_len, fd_offset); ++ } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) { ++ /* Write into file from RAM */ ++ transferred = pwrite(fd, hostptr, xlat_len, fd_offset); ++ } else { ++ res = -EINVAL; ++ break; ++ } ++ ++ trace_vhost_user_fs_slave_io_loop_res(transferred); ++ if (transferred < 0) { ++ res = -errno; ++ break; ++ } ++ if (!transferred) { ++ /* EOF */ ++ break; ++ } ++ ++ done += transferred; ++ fd_offset += transferred; ++ gpa += transferred; ++ len -= transferred; ++ } ++ } ++ ++ trace_vhost_user_fs_slave_io_exit(res, done); ++ if (res < 0) { ++ return (uint64_t)res; ++ } ++ return (uint64_t)done; ++} ++ + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 60f769b1b7..470aedc6d2 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_VRING_ERR = 5, + VHOST_USER_SLAVE_FS_MAP = 6, + VHOST_USER_SLAVE_FS_UNMAP = 7, ++ VHOST_USER_SLAVE_FS_IO = 8, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +@@ -1550,11 +1551,18 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, + break; + #ifdef CONFIG_VHOST_USER_FS + case VHOST_USER_SLAVE_FS_MAP: +- ret = vhost_user_fs_slave_map(dev, hdr.size, &payload.fs, ++ ret = vhost_user_fs_slave_map(dev, hdr.size, ++ (VhostUserFSSlaveMsg *)&payload.fs_max, + fd ? fd[0] : -1); + break; + case VHOST_USER_SLAVE_FS_UNMAP: +- ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs); ++ ret = vhost_user_fs_slave_unmap(dev, hdr.size, ++ (VhostUserFSSlaveMsg *)&payload.fs_max); ++ break; ++ case VHOST_USER_SLAVE_FS_IO: ++ ret = vhost_user_fs_slave_io(dev, hdr.size, ++ (VhostUserFSSlaveMsg *)&payload.fs_max, ++ fd ? fd[0] : -1); + break; + #endif + default: +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 1edb328c40..ac15accfd9 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -94,5 +94,7 @@ uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, size_t message_size, + VhostUserFSSlaveMsg *sm, int fd); + uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, size_t message_size, + VhostUserFSSlaveMsg *sm); ++uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, ++ VhostUserFSSlaveMsg *sm, int fd); + + #endif /* _QEMU_VHOST_USER_FS_H */ +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index 8b50918041..7e372cf874 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -121,6 +121,7 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_VRING_ERR = 5, + VHOST_USER_SLAVE_FS_MAP = 6, + VHOST_USER_SLAVE_FS_UNMAP = 7, ++ VHOST_USER_SLAVE_FS_IO = 8, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0017-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch similarity index 51% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0017-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch index 23b31b1791..e323d2b6fe 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0017-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch @@ -1,7 +1,7 @@ -From 5e3aff71f01f41254cdc7ecefc98a31be002dda0 Mon Sep 17 00:00:00 2001 +From aa1f765d1240ce410b0679f9bf0a1e1de0f3ade3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 20 May 2019 12:17:36 +0100 -Subject: [PATCH 18/29] DAX/unmap virtiofsd: Add wrappers for +Subject: [PATCH 17/25] DAX/unmap virtiofsd: Add wrappers for VHOST_USER_SLAVE_FS_IO Add a wrapper to send VHOST_USER_SLAVE_FS_IO commands and a @@ -9,18 +9,19 @@ further wrapper for sending a fuse_buf write using the FS_IO slave command. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- - tools/virtiofsd/fuse_lowlevel.h | 24 +++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.c | 38 +++++++++++++++++++++++++++++++++ - 2 files changed, 62 insertions(+) + tools/virtiofsd/fuse_lowlevel.h | 25 +++++++++++++++++++ + tools/virtiofsd/fuse_virtio.c | 43 +++++++++++++++++++++++++++++++++ + 2 files changed, 68 insertions(+) diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 2259623776..866d122352 100644 +index 1cb3b8a31a..50bc3d5ffd 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -2022,4 +2022,28 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); +@@ -2014,4 +2014,29 @@ int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); */ - int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); + int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); +/** + * For use with virtio-fs; request IO directly to memory @@ -28,9 +29,10 @@ index 2259623776..866d122352 100644 + * @param se The current session + * @param msg A set of IO requests + * @param fd The fd to map -+ * @return Zero on success ++ * @return Length on success, negative errno on error + */ -+int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd); ++int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, ++ int fd); + +/** + * For use with virtio-fs; wrapper for fuse_virtio_io for writes @@ -48,21 +50,22 @@ index 2259623776..866d122352 100644 + #endif /* FUSE_LOWLEVEL_H_ */ diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 24d9323665..abac0d0d2e 100644 +index 89e923dd26..a61be1512e 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c -@@ -1071,3 +1071,41 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) - return !vu_fs_cache_request(&req->se->virtio_dev->dev, - VHOST_USER_SLAVE_FS_SYNC, -1, msg); +@@ -1099,3 +1099,46 @@ int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) + return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, + -1, msg); } + -+int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd) ++int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, ++ int fd) +{ + if (!se->virtio_dev) { + return -ENODEV; + } -+ return !vu_fs_cache_request(&se->virtio_dev->dev, -+ VHOST_USER_SLAVE_FS_IO, fd, msg); ++ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_IO, ++ fd, msg); +} + +/* @@ -73,26 +76,30 @@ index 24d9323665..abac0d0d2e 100644 + size_t dst_off, const struct fuse_buf *src, + size_t src_off, size_t len) +{ -+ VhostUserFSSlaveMsg msg = { 0 }; ++ VhostUserFSSlaveMsg *msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + ++ sizeof(VhostUserFSSlaveMsgEntry)); ++ ++ msg->hdr.count = 1; + + if (dst->flags & FUSE_BUF_FD_SEEK) { -+ msg.fd_offset[0] = dst->pos + dst_off; ++ msg->entries[0].fd_offset = dst->pos + dst_off; + } else { + off_t cur = lseek(dst->fd, 0, SEEK_CUR); + if (cur == (off_t)-1) { ++ g_free(msg); + return -errno; + } -+ msg.fd_offset[0] = cur; ++ msg->entries[0].fd_offset = cur; + } -+ msg.c_offset[0] = (uintptr_t)src->mem + src_off; -+ msg.len[0] = len; -+ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W; ++ msg->entries[0].c_offset = (uintptr_t)src->mem + src_off; ++ msg->entries[0].len = len; ++ msg->entries[0].flags = VHOST_USER_FS_FLAG_MAP_W; + -+ bool result = !fuse_virtio_io(req->se, &msg, dst->fd); -+ /* TODO: Rework the result path to actually get length/error */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result); -+ return result ? len : -EIO; ++ int64_t result = fuse_virtio_io(req->se, msg, dst->fd); ++ fuse_log(FUSE_LOG_DEBUG, "%s: result=%" PRId64 "\n", __func__, result); ++ g_free(msg); ++ return result; +} -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0018-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch similarity index 63% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0018-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch index 5649391a30..98be0ef30a 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0018-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch @@ -1,11 +1,11 @@ -From 1586d4a5525f44c51cbcbd5004b9a79bfc8c495c Mon Sep 17 00:00:00 2001 +From e0c47d08b07c9e2c4c9e235ce2fb86594dde24af Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 20 May 2019 13:26:09 +0100 -Subject: [PATCH 19/29] DAX/unmap virtiofsd: Parse unmappable elements +Subject: [PATCH 18/25] DAX/unmap virtiofsd: Parse unmappable elements For some read/writes the virtio queue elements are unmappable by the daemon; these are cases where the data is to be read/written -from non-RAM. In viritofs's case this is typically a direct read/write +from non-RAM. In virtiofs's case this is typically a direct read/write into an mmap'd DAX file also on virtiofs (possibly on another instance). When we receive a virtio queue element, check that we have enough @@ -17,14 +17,15 @@ FUSE_BUF_PHYS_ADDR. Signed-off-by: Dr. David Alan Gilbert with fix by: Signed-off-by: Liu Bo +Reviewed-by: Stefan Hajnoczi --- tools/virtiofsd/buffer.c | 4 +- - tools/virtiofsd/fuse_common.h | 7 ++ - tools/virtiofsd/fuse_virtio.c | 191 ++++++++++++++++++++++++---------- - 3 files changed, 145 insertions(+), 57 deletions(-) + tools/virtiofsd/fuse_common.h | 8 ++ + tools/virtiofsd/fuse_virtio.c | 230 ++++++++++++++++++++++++---------- + 3 files changed, 174 insertions(+), 68 deletions(-) diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 874f01c488..1a050aa441 100644 +index b5f04be356..01a84144ec 100644 --- a/tools/virtiofsd/buffer.c +++ b/tools/virtiofsd/buffer.c @@ -77,6 +77,7 @@ static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, @@ -46,17 +47,18 @@ index 874f01c488..1a050aa441 100644 } } diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index a090040bb2..ed9280de91 100644 +index 0c2665b977..18f68bcbd3 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h -@@ -611,6 +611,13 @@ enum fuse_buf_flags { +@@ -631,6 +631,14 @@ enum fuse_buf_flags { * detected. */ FUSE_BUF_FD_RETRY = (1 << 3), + + /** + * The addresses in the iovec represent guest physical addresses -+ * that can't be mapped by the daemon process. ++ * (or IOVA when used with an IOMMU) * that can't be mapped by the ++ * daemon process. + * IO must be bounced back to the VMM to do it. + */ + FUSE_BUF_PHYS_ADDR = (1 << 4), @@ -64,7 +66,7 @@ index a090040bb2..ed9280de91 100644 /** diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index abac0d0d2e..31f17ab043 100644 +index a61be1512e..b858c723ed 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -49,6 +49,10 @@ typedef struct { @@ -78,7 +80,7 @@ index abac0d0d2e..31f17ab043 100644 /* Used to complete requests that involve no reply */ bool reply_sent; } FVRequest; -@@ -291,8 +295,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +@@ -354,8 +358,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, /* The 'in' part of the elem is to qemu */ unsigned int in_num = elem->in_num; @@ -89,7 +91,7 @@ index abac0d0d2e..31f17ab043 100644 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", __func__, elem->index, in_num, in_len); -@@ -300,7 +306,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +@@ -363,7 +369,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, * The elem should have room for a 'fuse_out_header' (out from fuse) * plus the data based on the len in the header. */ @@ -97,17 +99,17 @@ index abac0d0d2e..31f17ab043 100644 + if (in_len_writeable < sizeof(struct fuse_out_header)) { fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", __func__, elem->index); - ret = E2BIG; -@@ -327,7 +333,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + return E2BIG; +@@ -387,7 +393,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); /* These get updated as we skip */ struct iovec *in_sg_ptr = in_sg_cpy; -- int in_sg_cpy_count = in_num; -+ int in_sg_cpy_count = in_num - bad_in_num; +- unsigned int in_sg_cpy_count = in_num; ++ unsigned int in_sg_cpy_count = in_num - bad_in_num; /* skip over parts of in_sg that contained the header iov */ - size_t skip_size = iov_len; -@@ -460,17 +466,21 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, iov_len); +@@ -488,17 +494,21 @@ static void fv_queue_worker(gpointer data, gpointer user_data) /* The 'out' part of the elem is from qemu */ unsigned int out_num = elem->out_num; @@ -132,14 +134,19 @@ index abac0d0d2e..31f17ab043 100644 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", __func__, elem->index); assert(0); /* TODO */ -@@ -484,63 +494,129 @@ static void fv_queue_worker(gpointer data, gpointer user_data) - copy_from_iov(&fbuf, 1, out_sg); +@@ -509,80 +519,163 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + assert(0); /* TODO */ + } + /* Copy just the fuse_in_header and look at it */ +- copy_from_iov(&fbuf, out_num, out_sg, ++ copy_from_iov(&fbuf, out_num_readable, out_sg, + sizeof(struct fuse_in_header)); + memcpy(&inh, fbuf.mem, sizeof(struct fuse_in_header)); pbufv = NULL; /* Compiler thinks an unitialised path */ -- if (out_num > 2 && -- out_sg[0].iov_len == sizeof(struct fuse_in_header) && -- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { +- if (inh.opcode == FUSE_WRITE && +- out_len >= (sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in))) { - /* - * For a write we don't actually need to copy the - * data, we can just do it straight out of guest memory @@ -148,15 +155,15 @@ index abac0d0d2e..31f17ab043 100644 - */ - fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); - -- /* copy the fuse_write_in header afte rthe fuse_in_header */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; +- fbuf.size = copy_from_iov(&fbuf, out_num, out_sg, +- sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in)); +- /* That copy reread the in_header, make sure we use the original */ +- memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header)); - - /* Allocate the bufv, with space for the rest of the iov */ -- pbufv = malloc(sizeof(struct fuse_bufvec) + -- sizeof(struct fuse_buf) * (out_num - 2)); +- pbufv = g_try_malloc(sizeof(struct fuse_bufvec) + +- sizeof(struct fuse_buf) * out_num); - if (!pbufv) { - fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", - __func__); @@ -165,21 +172,22 @@ index abac0d0d2e..31f17ab043 100644 + if (req->bad_in_num || req->bad_out_num) { + bool handled_unmappable = false; + -+ if (out_num > 2 && out_num_readable >= 2 && !req->bad_in_num && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ if (!req->bad_in_num && ++ inh.opcode == FUSE_WRITE && ++ out_len_readable >= (sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in))) { + handled_unmappable = true; + + /* copy the fuse_write_in header after fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ fbuf.size = copy_from_iov(&fbuf, out_num_readable, out_sg, ++ sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in)); ++ /* That copy reread the in_header, make sure we use the original */ ++ memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header)); + + /* Allocate the bufv, with space for the rest of the iov */ -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); ++ pbufv = g_try_malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * out_num); + if (!pbufv) { + fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", + __func__); @@ -192,11 +200,40 @@ index abac0d0d2e..31f17ab043 100644 + allocated_bufv = true; + pbufv->count = 1; + pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ + +- size_t iovindex, pbufvindex, iov_bytes_skip; +- pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ size_t iovindex, pbufvindex, iov_bytes_skip; + pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ + +- if (!skip_iov(out_sg, out_num, +- sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in), +- &iovindex, &iov_bytes_skip)) { +- fuse_log(FUSE_LOG_ERR, "%s: skip failed\n", +- __func__); +- goto out; +- } ++ if (!skip_iov(out_sg, out_num, ++ sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in), ++ &iovindex, &iov_bytes_skip)) { ++ fuse_log(FUSE_LOG_ERR, "%s: skip failed\n", ++ __func__); ++ goto out; ++ } + +- for (; iovindex < out_num; iovindex++, pbufvindex++) { +- pbufv->count++; +- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ +- pbufv->buf[pbufvindex].flags = 0; +- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; +- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; +- +- if (iov_bytes_skip) { +- pbufv->buf[pbufvindex].mem += iov_bytes_skip; +- pbufv->buf[pbufvindex].size -= iov_bytes_skip; +- iov_bytes_skip = 0; + for (; iovindex < out_num; iovindex++, pbufvindex++) { + pbufv->count++; + pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ @@ -205,45 +242,45 @@ index abac0d0d2e..31f17ab043 100644 + FUSE_BUF_PHYS_ADDR; + pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; + pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } ++ ++ if (iov_bytes_skip) { ++ pbufv->buf[pbufvindex].mem += iov_bytes_skip; ++ pbufv->buf[pbufvindex].size -= iov_bytes_skip; ++ iov_bytes_skip = 0; ++ } + } + } +- } else { +- /* Normal (non fast write) path */ -- size_t iovindex, pbufvindex; -- iovindex = 2; /* 2 headers, separate iovs */ -- pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ if (out_num == 2 && out_num_readable == 2 && req->bad_in_num && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_READ && -+ out_sg[1].iov_len == sizeof(struct fuse_read_in)) { +- copy_from_iov(&fbuf, out_num, out_sg, se->bufsize); +- /* That copy reread the in_header, make sure we use the original */ +- memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header)); +- fbuf.size = out_len; ++ if (req->bad_in_num && ++ inh.opcode == FUSE_READ && ++ out_len_readable >= ++ (sizeof(struct fuse_in_header) + sizeof(struct fuse_read_in))) { + fuse_log(FUSE_LOG_DEBUG, + "Unmappable read case " + "in_num=%d bad_in_num=%d\n", + elem->in_num, req->bad_in_num); + handled_unmappable = true; + } - -- for (; iovindex < out_num; iovindex++, pbufvindex++) { -- pbufv->count++; -- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -- pbufv->buf[pbufvindex].flags = 0; -- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ + if (!handled_unmappable) { + fuse_log(FUSE_LOG_ERR, + "Unhandled unmappable element: out: %d(b:%d) in: " + "%d(b:%d)", + out_num, req->bad_out_num, elem->in_num, req->bad_in_num); + fv_panic(dev, "Unhandled unmappable element"); - } -- } else { -- /* Normal (non fast write) path */ ++ } + } + + if (!req->bad_out_num) { -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ if (inh.opcode == FUSE_WRITE && ++ out_len_readable >= (sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in))) { + /* + * For a write we don't actually need to copy the + * data, we can just do it straight out of guest memory @@ -253,53 +290,63 @@ index abac0d0d2e..31f17ab043 100644 + fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", + __func__); + -+ /* copy the fuse_write_in header after fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ fbuf.size = copy_from_iov(&fbuf, out_num, out_sg, ++ sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in)); ++ /* That copy reread the in_header, make sure we use the original */ ++ memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header)); + + /* Allocate the bufv, with space for the rest of the iov */ + pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); ++ sizeof(struct fuse_buf) * out_num); + if (!pbufv) { + fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", + __func__); + goto out; + } - -- /* Copy the rest of the buffer */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_len; ++ + allocated_bufv = true; + pbufv->count = 1; + pbufv->buf[0] = fbuf; - /* TODO! Endianness of header */ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ ++ size_t iovindex, pbufvindex, iov_bytes_skip; + pbufvindex = 1; /* 2 headers, 1 fusebuf */ - /* TODO: Add checks for fuse_session_exited */ - bufv.buf[0] = fbuf; - bufv.count = 1; - pbufv = &bufv; ++ if (!skip_iov(out_sg, out_num, ++ sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in), ++ &iovindex, &iov_bytes_skip)) { ++ fuse_log(FUSE_LOG_ERR, "%s: skip failed\n", ++ __func__); ++ goto out; ++ } ++ + for (; iovindex < out_num; iovindex++, pbufvindex++) { + pbufv->count++; + pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ + pbufv->buf[pbufvindex].flags = 0; + pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; + pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ ++ if (iov_bytes_skip) { ++ pbufv->buf[pbufvindex].mem += iov_bytes_skip; ++ pbufv->buf[pbufvindex].size -= iov_bytes_skip; ++ iov_bytes_skip = 0; ++ } + } + } else { + /* Normal (non fast write) path */ + + /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num, out_sg, se->bufsize); ++ /* That copy reread the in_header, make sure we use the original */ ++ memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header)); ++ + fbuf.size = out_len; + + /* TODO! Endianness of header */ @@ -312,7 +359,7 @@ index abac0d0d2e..31f17ab043 100644 } pbufv->idx = 0; pbufv->off = 0; -@@ -657,13 +733,16 @@ static void *fv_queue_thread(void *opaque) +@@ -697,13 +790,16 @@ static void *fv_queue_thread(void *opaque) __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); while (1) { @@ -331,5 +378,5 @@ index abac0d0d2e..31f17ab043 100644 if (!se->thread_pool_size) { req_list = g_list_prepend(req_list, req); -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0019-DAX-unmap-virtiofsd-Route-unmappable-reads.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0019-DAX-unmap-virtiofsd-Route-unmappable-reads.patch new file mode 100644 index 0000000000..4c6c471f72 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0019-DAX-unmap-virtiofsd-Route-unmappable-reads.patch @@ -0,0 +1,63 @@ +From 0ec3e32695a7a5d9ee79bdc7fb878d779cde7c3e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 13:26:51 +0100 +Subject: [PATCH 19/25] DAX/unmap virtiofsd: Route unmappable reads + +When a read with unmappable buffers is found, map it to a slave +read command. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi +--- + tools/virtiofsd/fuse_virtio.c | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index b858c723ed..b0f9d06c9e 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -434,6 +434,41 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + } + } while (len); + ++ if (bad_in_num) { ++ /* TODO: Rework to send in fewer messages */ ++ VhostUserFSSlaveMsg *msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + ++ sizeof(VhostUserFSSlaveMsgEntry)); ++ while (len && bad_in_num) { ++ msg->hdr.count = 1; ++ msg->entries[0].flags = VHOST_USER_FS_FLAG_MAP_R; ++ msg->entries[0].fd_offset = buf->buf[0].pos; ++ msg->entries[0].c_offset = ++ (uint64_t)(uintptr_t)in_sg_ptr[0].iov_base; ++ msg->entries[0].len = in_sg_ptr[0].iov_len; ++ if (len < msg->entries[0].len) { ++ msg->entries[0].len = len; ++ } ++ int64_t req_res = fuse_virtio_io(se, msg, buf->buf[0].fd); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%jd " ++ "c_offset=%p req_res=%" PRId64 "\n", ++ __func__, len, bad_in_num, (intmax_t)(buf->buf[0].pos), ++ in_sg_ptr[0].iov_base, req_res); ++ if (req_res > 0) { ++ len -= msg->entries[0].len; ++ buf->buf[0].pos += msg->entries[0].len; ++ in_sg_ptr++; ++ bad_in_num--; ++ } else if (req_res == 0) { ++ break; ++ } else { ++ g_free(msg); ++ return req_res; ++ } ++ } ++ g_free(msg); ++ } ++ + /* Need to fix out->len on EOF */ + if (len) { + struct fuse_out_header *out_sg = in_sg[0].iov_base; +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0020-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch similarity index 88% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0020-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch index f8a73044b9..155a65c887 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0020-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch @@ -1,7 +1,7 @@ -From e291b7766f49b06933afed374b6476416d951517 Mon Sep 17 00:00:00 2001 +From a3df45f597e92b3d57d2ccfefef8de1eb356ade8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 20 May 2019 13:18:42 +0100 -Subject: [PATCH 21/29] DAX/unmap virtiofsd: route unmappable write to slave +Subject: [PATCH 20/25] DAX/unmap virtiofsd: route unmappable write to slave command When a fuse_buf_copy is performed on an element with FUSE_BUF_PHYS_ADDR @@ -9,6 +9,7 @@ route it to a fuse_virtio_write request that does a slave command to perform the write. Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Hajnoczi --- tools/virtiofsd/buffer.c | 14 +++++++++++--- tools/virtiofsd/fuse_common.h | 6 +++++- @@ -17,7 +18,7 @@ Signed-off-by: Dr. David Alan Gilbert 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 1a050aa441..8135d52d2a 100644 +index 01a84144ec..7dd416a64f 100644 --- a/tools/virtiofsd/buffer.c +++ b/tools/virtiofsd/buffer.c @@ -200,13 +200,20 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, @@ -62,10 +63,10 @@ index 1a050aa441..8135d52d2a 100644 if (!copied) { return res; diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index ed9280de91..05d56883dd 100644 +index 18f68bcbd3..46a0111cbd 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h -@@ -495,6 +495,8 @@ struct fuse_conn_info { +@@ -515,6 +515,8 @@ struct fuse_conn_info { struct fuse_session; struct fuse_pollhandle; struct fuse_conn_info_opts; @@ -74,7 +75,7 @@ index ed9280de91..05d56883dd 100644 /** * This function parses several command-line options that can be used -@@ -713,11 +715,13 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); +@@ -734,11 +736,13 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); /** * Copy data from one buffer vector to another * @@ -90,7 +91,7 @@ index ed9280de91..05d56883dd 100644 /** * Memory buffer iterator diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 866d122352..e543f64177 100644 +index 50bc3d5ffd..6c910d4cc0 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -42,9 +42,6 @@ @@ -104,10 +105,10 @@ index 866d122352..e543f64177 100644 * Session * diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 56a4b9404a..ab33fabcda 100644 +index 20c5ea897b..f614171c90 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c -@@ -2063,7 +2063,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +@@ -2384,7 +2384,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, } } @@ -117,5 +118,5 @@ index 56a4b9404a..ab33fabcda 100644 fuse_reply_err(req, -res); } else { -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0021-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch similarity index 61% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0021-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch index e39c64c83d..7eda80b374 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0021-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch @@ -1,7 +1,7 @@ -From e684fffcaf21baf0f4341091303ce3c2dcbf822d Mon Sep 17 00:00:00 2001 +From ece506307310970cc4c59fd598fe5d2afa303642 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 26 Jul 2019 09:33:22 +0100 -Subject: [PATCH 26/29] DAX:virtiofsd: implement FUSE_INIT map_alignment field +Subject: [PATCH 21/25] DAX:virtiofsd: implement FUSE_INIT map_alignment field Communicate the host page size to the FUSE client so that FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING requests are aware of our alignment @@ -9,14 +9,22 @@ constraints. Signed-off-by: Stefan Hajnoczi --- - tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ - 1 file changed, 7 insertions(+) + tools/virtiofsd/fuse_lowlevel.c | 8 ++++++++ + 1 file changed, 8 insertions(+) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 99ba000c2e..d6256f571b 100644 +index 82ebd29b22..6ae6fb3d78 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2188,6 +2188,12 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, +@@ -10,6 +10,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/host-utils.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" +@@ -2203,6 +2204,12 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, outarg.max_background = se->conn.max_background; outarg.congestion_threshold = se->conn.congestion_threshold; outarg.time_gran = se->conn.time_gran; @@ -24,12 +32,12 @@ index 99ba000c2e..d6256f571b 100644 + outarg.flags |= FUSE_MAP_ALIGNMENT; + + /* This constraint comes from mmap(2) and munmap(2) */ -+ outarg.map_alignment = ffsl(sysconf(_SC_PAGE_SIZE)) - 1; ++ outarg.map_alignment = ctz64(sysconf(_SC_PAGE_SIZE)); + } - fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); - fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -@@ -2197,6 +2203,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + if (se->conn.want & FUSE_CAP_HANDLE_KILLPRIV_V2) { + outarg.flags |= FUSE_HANDLE_KILLPRIV_V2; +@@ -2220,6 +2227,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", outarg.congestion_threshold); fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); @@ -38,5 +46,5 @@ index 99ba000c2e..d6256f571b 100644 send_reply_ok(req, &outarg, outargsize); } -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0022-vhost-user-fs-Extend-VhostUserFSSlaveMsg-to-pass-add.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0022-vhost-user-fs-Extend-VhostUserFSSlaveMsg-to-pass-add.patch new file mode 100644 index 0000000000..6c9f86d2b8 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0022-vhost-user-fs-Extend-VhostUserFSSlaveMsg-to-pass-add.patch @@ -0,0 +1,108 @@ +From 9e86ae7b22b299753785a2fc476c9ba8cf5f51ec Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Wed, 3 Feb 2021 11:56:55 -0500 +Subject: [PATCH 22/25] vhost-user-fs: Extend VhostUserFSSlaveMsg to pass + additional info + +Extend VhostUserFSSlaveMsg so that slave can ask it to drop CAP_FSETID +before doing I/O on fd. + +In some cases, virtiofsd takes the onus of clearing setuid bit on a file +when WRITE happens. Generally virtiofsd does the WRITE to fd (from guest +memory which is mapped in virtiofsd as well), but if this memory is +unmappable in virtiofsd (like cache window), then virtiofsd asks qemu +to do the I/O instead. + +To retain the capability to drop suid bit on write, qemu needs to +drop the CAP_FSETID as well before write to fd. Extend VhostUserFSSlaveMsg +so that virtiofsd can specify in message if CAP_FSETID needs to be +dropped. + +Signed-off-by: Vivek Goyal +--- + hw/virtio/vhost-user-fs.c | 5 +++++ + include/hw/virtio/vhost-user-fs.h | 10 +++++++--- + subprojects/libvhost-user/libvhost-user.h | 10 +++++++--- + 3 files changed, 19 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 2b022fc676..ca4e6f8017 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -244,6 +244,11 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, + return (uint64_t)-1; + } + ++ if (sm->hdr.flags & VHOST_USER_FS_GENFLAG_DROP_FSETID) { ++ error_report("Dropping CAP_FSETID is not supported"); ++ return (uint64_t)-ENOTSUP; ++ } ++ + for (i = 0; i < sm->hdr.count && !res; i++) { + VhostUserFSSlaveMsgEntry *e = &sm->entries[i]; + if (e->len == 0) { +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index ac15accfd9..cd4531d30c 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -30,6 +30,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS) + #define VHOST_USER_FS_FLAG_MAP_R (1u << 0) + #define VHOST_USER_FS_FLAG_MAP_W (1u << 1) + ++/* Generic flags for the overall message and not individual ranges */ ++/* Drop capability CAP_FSETID during the operation */ ++#define VHOST_USER_FS_GENFLAG_DROP_FSETID (1u << 0) ++ + typedef struct { + /* Offsets within the file being mapped */ + uint64_t fd_offset; +@@ -42,12 +46,12 @@ typedef struct { + } VhostUserFSSlaveMsgEntry; + + typedef struct { +- /* Spare */ +- uint32_t align32; ++ /* Generic flags for the overall message */ ++ uint32_t flags; + /* Number of entries */ + uint16_t count; + /* Spare */ +- uint16_t align16; ++ uint16_t align; + } VhostUserFSSlaveMsgHdr; + + /* +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index 7e372cf874..eee611a2f6 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -132,6 +132,10 @@ typedef enum VhostUserSlaveRequest { + #define VHOST_USER_FS_FLAG_MAP_R (1u << 0) + #define VHOST_USER_FS_FLAG_MAP_W (1u << 1) + ++/* Generic flags for the overall message and not individual ranges */ ++/* Drop capability CAP_FSETID during the operation */ ++#define VHOST_USER_FS_GENFLAG_DROP_FSETID (1u << 0) ++ + typedef struct { + /* Offsets within the file being mapped */ + uint64_t fd_offset; +@@ -144,12 +148,12 @@ typedef struct { + } VhostUserFSSlaveMsgEntry; + + typedef struct { +- /* Spare */ +- uint32_t align32; ++ /* Generic flags for the overall message */ ++ uint32_t flags; + /* Number of entries */ + uint16_t count; + /* Spare */ +- uint16_t align16; ++ uint16_t align; + } VhostUserFSSlaveMsgHdr; + + /* +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0023-vhost-user-fs-Implement-drop-CAP_FSETID-functionalit.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0023-vhost-user-fs-Implement-drop-CAP_FSETID-functionalit.patch new file mode 100644 index 0000000000..d160cd9844 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0023-vhost-user-fs-Implement-drop-CAP_FSETID-functionalit.patch @@ -0,0 +1,182 @@ +From e123501b6f69ef6dc7648dac06bd9001553e1f1f Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Wed, 3 Feb 2021 11:56:55 -0500 +Subject: [PATCH 23/25] vhost-user-fs: Implement drop CAP_FSETID functionality + +As part of slave_io message, slave can ask to do I/O on an fd. Additionally +slave can ask for dropping CAP_FSETID (if master has it) before doing I/O. +Implement functionality to drop CAP_FSETID and gain it back after the +operation. + +This also creates a dependency on libcap-ng. + +Signed-off-by: Vivek Goyal +--- + hw/virtio/meson.build | 1 + + hw/virtio/vhost-user-fs.c | 92 ++++++++++++++++++++++++++++++++++++++- + meson.build | 6 +++ + 3 files changed, 97 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index bc352a6009..33c4714a22 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -18,6 +18,7 @@ virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) + virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) + virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) ++virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: libcap_ng) + virtio_ss.add(when: ['CONFIG_VHOST_USER_FS', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-fs-pci.c')) + virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) + virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 'vhost-vsock-common.c')) +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index ca4e6f8017..f472d97f2c 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -13,6 +13,8 @@ + + #include "qemu/osdep.h" + #include ++#include ++#include + #include "standard-headers/linux/virtio_fs.h" + #include "qapi/error.h" + #include "hw/qdev-properties.h" +@@ -91,6 +93,84 @@ static bool check_slave_message_entries(const VhostUserFSSlaveMsg *sm, + return true; + } + ++/* ++ * Helpers for dropping and regaining effective capabilities. Returns 0 ++ * on success, error otherwise ++ */ ++static int drop_effective_cap(const char *cap_name, bool *cap_dropped) ++{ ++ int cap, ret; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = -errno; ++ error_report("capng_name_to_capability(%s) failed:%s", cap_name, ++ strerror(errno)); ++ goto out; ++ } ++ ++ if (capng_get_caps_process()) { ++ ret = -errno; ++ error_report("capng_get_caps_process() failed:%s", strerror(errno)); ++ goto out; ++ } ++ ++ /* We dont have this capability in effective set already. */ ++ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { ++ ret = -errno; ++ error_report("capng_update(DROP,) failed"); ++ goto out; ++ } ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = -errno; ++ error_report("drop:capng_apply() failed"); ++ goto out; ++ } ++ ++ ret = 0; ++ if (cap_dropped) { ++ *cap_dropped = true; ++ } ++ ++out: ++ return ret; ++} ++ ++static int gain_effective_cap(const char *cap_name) ++{ ++ int cap; ++ int ret = 0; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = -errno; ++ error_report("capng_name_to_capability(%s) failed:%s", cap_name, ++ strerror(errno)); ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { ++ ret = -errno; ++ error_report("capng_update(ADD,) failed"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = -errno; ++ error_report("gain:capng_apply() failed"); ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ return ret; ++} ++ + uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, size_t message_size, + VhostUserFSSlaveMsg *sm, int fd) + { +@@ -238,6 +318,7 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, + unsigned int i; + int res = 0; + size_t done = 0; ++ bool cap_fsetid_dropped = false; + + if (fd < 0) { + error_report("Bad fd for io"); +@@ -245,8 +326,10 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, + } + + if (sm->hdr.flags & VHOST_USER_FS_GENFLAG_DROP_FSETID) { +- error_report("Dropping CAP_FSETID is not supported"); +- return (uint64_t)-ENOTSUP; ++ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); ++ if (res != 0) { ++ return (uint64_t)res; ++ } + } + + for (i = 0; i < sm->hdr.count && !res; i++) { +@@ -322,6 +405,11 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, size_t message_size, + } + } + ++ if (cap_fsetid_dropped) { ++ if (gain_effective_cap("FSETID")) { ++ error_report("Failed to gain CAP_FSETID"); ++ } ++ } + trace_vhost_user_fs_slave_io_exit(res, done); + if (res < 0) { + return (uint64_t)res; +diff --git a/meson.build b/meson.build +index b3e7ec0e92..725094fecc 100644 +--- a/meson.build ++++ b/meson.build +@@ -1222,6 +1222,12 @@ elif get_option('virtfs').disabled() + have_virtfs = false + endif + ++if config_host.has_key('CONFIG_VHOST_USER_FS') ++ if not libcap_ng.found() ++ error('vhost-user-fs requires libcap-ng-devel') ++ endif ++endif ++ + config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir')) + config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix')) + config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir) +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0024-virtiofsd-Ask-qemu-to-drop-CAP_FSETID-if-client-aske.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0024-virtiofsd-Ask-qemu-to-drop-CAP_FSETID-if-client-aske.patch new file mode 100644 index 0000000000..d337676919 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0024-virtiofsd-Ask-qemu-to-drop-CAP_FSETID-if-client-aske.patch @@ -0,0 +1,140 @@ +From faaf8c804a7d8e5629049c4800892be60ba16224 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Wed, 3 Feb 2021 11:56:55 -0500 +Subject: [PATCH 24/25] virtiofsd: Ask qemu to drop CAP_FSETID if client asked + for it + +If qemu guest asked to drop CAP_FSETID upon write, send that info +to qemu in SLAVE_FS_IO message so that qemu can drop capability +before WRITE. This is to make sure that any setuid bit is killed +on fd (if there is one set). + +Signed-off-by: Vivek Goyal +--- + tools/virtiofsd/buffer.c | 10 ++++++---- + tools/virtiofsd/fuse_common.h | 6 +++++- + tools/virtiofsd/fuse_lowlevel.h | 6 +++++- + tools/virtiofsd/fuse_virtio.c | 5 ++++- + tools/virtiofsd/passthrough_ll.c | 2 +- + 5 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 7dd416a64f..87cc383713 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -203,7 +203,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + static ssize_t fuse_buf_copy_one(fuse_req_t req, + const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, +- size_t len) ++ size_t len, bool dropped_cap_fsetid) + { + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +@@ -211,7 +211,8 @@ static ssize_t fuse_buf_copy_one(fuse_req_t req, + int dst_is_phys = src->flags & FUSE_BUF_PHYS_ADDR; + + if (src_is_phys && !src_is_fd && dst_is_fd) { +- return fuse_virtio_write(req, dst, dst_off, src, src_off, len); ++ return fuse_virtio_write(req, dst, dst_off, src, src_off, len, ++ dropped_cap_fsetid); + } + assert(!src_is_phys && !dst_is_phys); + if (!src_is_fd && !dst_is_fd) { +@@ -267,7 +268,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + } + + ssize_t fuse_buf_copy(fuse_req_t req, struct fuse_bufvec *dstv, +- struct fuse_bufvec *srcv) ++ struct fuse_bufvec *srcv, bool dropped_cap_fsetid) + { + size_t copied = 0, i; + +@@ -309,7 +310,8 @@ ssize_t fuse_buf_copy(fuse_req_t req, struct fuse_bufvec *dstv, + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + +- res = fuse_buf_copy_one(req, dst, dstv->off, src, srcv->off, len); ++ res = fuse_buf_copy_one(req, dst, dstv->off, src, srcv->off, len, ++ dropped_cap_fsetid); + if (res < 0) { + if (!copied) { + return res; +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 46a0111cbd..f0df821c6d 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -739,10 +739,14 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * @param req The request this copy is part of + * @param dst destination buffer vector + * @param src source buffer vector ++ * @param dropped_cap_fsetid Caller has dropped CAP_FSETID. If work is handed ++ * over to a different thread/process, CAP_FSETID needs to be dropped ++ * there as well. + * @return actual number of bytes copied or -errno on error + */ + ssize_t fuse_buf_copy(fuse_req_t req, +- struct fuse_bufvec *dst, struct fuse_bufvec *src); ++ struct fuse_bufvec *dst, struct fuse_bufvec *src, ++ bool dropped_cap_fsetid); + + /** + * Memory buffer iterator +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 6c910d4cc0..5681477e9e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -2031,9 +2031,13 @@ int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, + * @param src The source (memory) buffer + * @param src_off The GPA + * @param len Length in bytes ++ * @param dropped_cap_fsetid Caller dropped CAP_FSETID. If it is being handed ++ * over to different thread/process, CAP_FSETID needs to be dropped ++ * before write. + */ + ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, + size_t dst_off, const struct fuse_buf *src, +- size_t src_off, size_t len); ++ size_t src_off, size_t len, ++ bool dropped_cap_fsetid); + + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index b0f9d06c9e..99d4244fa3 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -1247,7 +1247,7 @@ int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, + */ + ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, + size_t dst_off, const struct fuse_buf *src, +- size_t src_off, size_t len) ++ size_t src_off, size_t len, bool dropped_cap_fsetid) + { + VhostUserFSSlaveMsg *msg = g_malloc0(sizeof(VhostUserFSSlaveMsg) + + sizeof(VhostUserFSSlaveMsgEntry)); +@@ -1267,6 +1267,9 @@ ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, + msg->entries[0].c_offset = (uintptr_t)src->mem + src_off; + msg->entries[0].len = len; + msg->entries[0].flags = VHOST_USER_FS_FLAG_MAP_W; ++ if (dropped_cap_fsetid) { ++ msg->hdr.flags |= VHOST_USER_FS_GENFLAG_DROP_FSETID; ++ } + + int64_t result = fuse_virtio_io(req->se, msg, dst->fd); + fuse_log(FUSE_LOG_DEBUG, "%s: result=%" PRId64 "\n", __func__, result); +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f614171c90..b7c1fa71b5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2384,7 +2384,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + } + } + +- res = fuse_buf_copy(req, &out_buf, in_buf); ++ res = fuse_buf_copy(req, &out_buf, in_buf, fi->kill_priv); + if (res < 0) { + fuse_reply_err(req, -res); + } else { +-- +2.31.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0025-virtiofsd-Add-printf-checking-to-fuse_log.patch similarity index 87% rename from tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch rename to tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0025-virtiofsd-Add-printf-checking-to-fuse_log.patch index 5b593014e1..b6f63a1eae 100644 --- a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch +++ b/tools/packaging/qemu/patches/tag_patches/7a800cf9496fddddf71b21a00991e0ec757a170a/0025-virtiofsd-Add-printf-checking-to-fuse_log.patch @@ -1,7 +1,7 @@ -From e2a3c273639368221dae39a7f230a46d0a580e4d Mon Sep 17 00:00:00 2001 +From 49a2448ac10e3c827d5f6d75394de7e9c8d07477 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 21 Jan 2020 10:20:14 +0000 -Subject: [PATCH 29/29] virtiofsd: Add printf checking to fuse_log +Subject: [PATCH 25/25] virtiofsd: Add printf checking to fuse_log Use qemu's GCC_FMT_ATTR to add printf style checking to fuse_log. @@ -31,5 +31,5 @@ index 8d7091bd4d..5c2df71603 100644 #endif /* FUSE_LOG_H_ */ -- -2.25.1 +2.31.1 diff --git a/tools/packaging/qemu/patches/tag_patches/v6.1.0/no_patches.txt b/tools/packaging/qemu/patches/tag_patches/v6.1.0/no_patches.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/packaging/scripts/apply_patches.sh b/tools/packaging/scripts/apply_patches.sh index 98d5b02816..28bf2f8fe9 100755 --- a/tools/packaging/scripts/apply_patches.sh +++ b/tools/packaging/scripts/apply_patches.sh @@ -45,6 +45,6 @@ if [ -d "$patches_dir" ]; then done else echo "INFO: Patches directory does not exist: ${patches_dir}" - echo "INFO: Create a ${patches_dir}/no_patches file if the current version has no patches" + echo "INFO: Create a ${patches_dir}/no_patches.txt file if the current version has no patches" exit 1; fi diff --git a/versions.yaml b/versions.yaml index ebf143f185..8204ebaf47 100644 --- a/versions.yaml +++ b/versions.yaml @@ -88,8 +88,8 @@ assets: qemu: description: "VMM that uses KVM" url: "https://github.com/qemu/qemu" - version: "v5.2.0" - tag: "v5.2.0" + version: "v6.1.0" + tag: "v6.1.0" # Do not include any non-full release versions # Break the line *without CR or space being appended*, to appease # yamllint, and note the deliberate ' ' at the end of the expression. @@ -108,7 +108,7 @@ assets: qemu-experimental: description: "QEMU with virtiofs support" url: "https://github.com/qemu/qemu" - version: "470dd6bd360782f5137f7e3376af6a44658eb1d3" + version: "7a800cf9496fddddf71b21a00991e0ec757a170a" image: description: |