diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch new file mode 100644 index 0000000000..d3f2a1674b --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch @@ -0,0 +1,270 @@ +From d14a6cb000d0a5f9e382e5e5de0021756034d0cb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 2 May 2019 18:04:04 +0100 +Subject: [PATCH 01/29] DAX: libvhost-user: Allow popping a queue element with + bad pointers + +Allow a daemon implemented with libvhost-user to accept an +element with pointers to memory that aren't in the mapping table. +The daemon might have some special way to deal with some special +cases of this. + +The default behaviour doesn't change. + +Signed-off-by: Dr. David Alan Gilbert +--- + contrib/vhost-user-blk/vhost-user-blk.c | 3 +- + contrib/vhost-user-gpu/vhost-user-gpu.c | 5 ++- + contrib/vhost-user-input/main.c | 4 +- + contrib/vhost-user-scsi/vhost-user-scsi.c | 2 +- + subprojects/libvhost-user/libvhost-user.c | 51 ++++++++++++++++++----- + subprojects/libvhost-user/libvhost-user.h | 8 +++- + tests/vhost-user-bridge.c | 4 +- + tools/virtiofsd/fuse_virtio.c | 3 +- + 8 files changed, 59 insertions(+), 21 deletions(-) + +diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c +index d14b2896bf..01193552e9 100644 +--- a/contrib/vhost-user-blk/vhost-user-blk.c ++++ b/contrib/vhost-user-blk/vhost-user-blk.c +@@ -235,7 +235,8 @@ static int vub_virtio_process_req(VubDev *vdev_blk, + unsigned out_num; + VubReq *req; + +- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); ++ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq), ++ NULL, NULL); + if (!elem) { + return -1; + } +diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c +index f445ef28ec..58161a4378 100644 +--- a/contrib/vhost-user-gpu/vhost-user-gpu.c ++++ b/contrib/vhost-user-gpu/vhost-user-gpu.c +@@ -819,7 +819,8 @@ vg_handle_ctrl(VuDev *dev, int qidx) + return; + } + +- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); ++ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command), ++ NULL, NULL); + if (!cmd) { + break; + } +@@ -922,7 +923,7 @@ vg_handle_cursor(VuDev *dev, int qidx) + struct virtio_gpu_update_cursor cursor; + + for (;;) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + break; + } +diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c +index c15d18c33f..d5c435605c 100644 +--- a/contrib/vhost-user-input/main.c ++++ b/contrib/vhost-user-input/main.c +@@ -57,7 +57,7 @@ static void vi_input_send(VuInput *vi, struct virtio_input_event *event) + + /* ... then check available space ... */ + for (i = 0; i < vi->qindex; i++) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + while (--i >= 0) { + vu_queue_unpop(dev, vq, vi->queue[i].elem, 0); +@@ -141,7 +141,7 @@ static void vi_handle_sts(VuDev *dev, int qidx) + g_debug("%s", G_STRFUNC); + + for (;;) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + break; + } +diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c +index 4f6e3e2a24..7564d6ab2d 100644 +--- a/contrib/vhost-user-scsi/vhost-user-scsi.c ++++ b/contrib/vhost-user-scsi/vhost-user-scsi.c +@@ -252,7 +252,7 @@ static void vus_proc_req(VuDev *vu_dev, int idx) + VirtIOSCSICmdReq *req; + VirtIOSCSICmdResp *rsp; + +- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + g_debug("No more elements pending on vq[%d]@%p", idx, vq); + break; +diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c +index fab7ca17ee..3b1b5c385f 100644 +--- a/subprojects/libvhost-user/libvhost-user.c ++++ b/subprojects/libvhost-user/libvhost-user.c +@@ -2461,7 +2461,8 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable) + + static bool + virtqueue_map_desc(VuDev *dev, +- unsigned int *p_num_sg, struct iovec *iov, ++ unsigned int *p_num_sg, unsigned int *p_bad_sg, ++ struct iovec *iov, + unsigned int max_num_sg, bool is_write, + uint64_t pa, size_t sz) + { +@@ -2482,10 +2483,35 @@ virtqueue_map_desc(VuDev *dev, + return false; + } + +- iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa); +- if (iov[num_sg].iov_base == NULL) { +- vu_panic(dev, "virtio: invalid address for buffers"); +- return false; ++ if (p_bad_sg && *p_bad_sg) { ++ /* A previous mapping was bad, we won't try and map this either */ ++ *p_bad_sg = *p_bad_sg + 1; ++ } ++ if (!p_bad_sg || !*p_bad_sg) { ++ /* No bad mappings so far, lets try mapping this one */ ++ iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa); ++ if (iov[num_sg].iov_base == NULL) { ++ /* ++ * OK, it won't map, either panic or if the caller can handle ++ * it, then count it. ++ */ ++ if (!p_bad_sg) { ++ vu_panic(dev, "virtio: invalid address for buffers"); ++ return false; ++ } else { ++ *p_bad_sg = *p_bad_sg + 1; ++ } ++ } ++ } ++ if (p_bad_sg && *p_bad_sg) { ++ /* ++ * There was a bad mapping, either now or previously, since ++ * the caller set p_bad_sg it means it's prepared to deal with ++ * it, so give it the pa in the iov ++ * Note: In this case len will be the whole sz, so we won't ++ * go around again for this descriptor ++ */ ++ iov[num_sg].iov_base = (void *)(uintptr_t)pa; + } + iov[num_sg].iov_len = len; + num_sg++; +@@ -2516,7 +2542,8 @@ virtqueue_alloc_element(size_t sz, + } + + static void * +-vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) ++vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz, ++ unsigned int *p_bad_in, unsigned int *p_bad_out) + { + struct vring_desc *desc = vq->vring.desc; + uint64_t desc_addr, read_len; +@@ -2560,7 +2587,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) + /* Collect all the descriptors */ + do { + if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) { +- if (!virtqueue_map_desc(dev, &in_num, iov + out_num, ++ if (!virtqueue_map_desc(dev, &in_num, p_bad_in, iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, + le64toh(desc[i].addr), + le32toh(desc[i].len))) { +@@ -2571,7 +2598,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) + vu_panic(dev, "Incorrect order for descriptors"); + return NULL; + } +- if (!virtqueue_map_desc(dev, &out_num, iov, ++ if (!virtqueue_map_desc(dev, &out_num, p_bad_out, iov, + VIRTQUEUE_MAX_SIZE, false, + le64toh(desc[i].addr), + le32toh(desc[i].len))) { +@@ -2661,7 +2688,8 @@ vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx) + } + + void * +-vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) ++vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz, ++ unsigned int *p_bad_in, unsigned int *p_bad_out) + { + int i; + unsigned int head; +@@ -2674,7 +2702,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) + + if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) { + i = (--vq->resubmit_num); +- elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz); ++ elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz, ++ p_bad_in, p_bad_out); + + if (!vq->resubmit_num) { + free(vq->resubmit_list); +@@ -2706,7 +2735,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) + vring_set_avail_event(vq, vq->last_avail_idx); + } + +- elem = vu_queue_map_desc(dev, vq, head, sz); ++ elem = vu_queue_map_desc(dev, vq, head, sz, p_bad_in, p_bad_out); + + if (!elem) { + return NULL; +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index 7d47f1364a..f0aca2b216 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -589,11 +589,17 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @sz: the size of struct to return (must be >= VuVirtqElement) ++ * @p_bad_in: If none NULL, a pointer to an integer count of ++ * unmappable regions in input descriptors ++ * @p_bad_out: If none NULL, a pointer to an integer count of ++ * unmappable regions in output descriptors ++ * + * + * Returns: a VuVirtqElement filled from the queue or NULL. The + * returned element must be free()-d by the caller. + */ +-void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz); ++void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz, ++ unsigned int *p_bad_in, unsigned int *p_bad_out); + + + /** +diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c +index 24815920b2..4f6829e6c3 100644 +--- a/tests/vhost-user-bridge.c ++++ b/tests/vhost-user-bridge.c +@@ -184,7 +184,7 @@ vubr_handle_tx(VuDev *dev, int qidx) + unsigned int out_num; + struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg; + +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + break; + } +@@ -299,7 +299,7 @@ vubr_backend_recv_cb(int sock, void *ctx) + ssize_t ret, total = 0; + unsigned int num; + +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL); + if (!elem) { + break; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index ddcefee427..bd19358437 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -657,7 +657,8 @@ static void *fv_queue_thread(void *opaque) + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + + while (1) { +- FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); ++ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest), ++ NULL, NULL); + if (!req) { + break; + } +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch new file mode 100644 index 0000000000..c0036d0079 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0001-virtiofsd-Allow-to-build-it-without-the-tools.patch @@ -0,0 +1,36 @@ +From da5d60ab13c9e31f775b34d7afe6d82fca7f2336 Mon Sep 17 00:00:00 2001 +From: Wainer dos Santos Moschetta +Date: Tue, 2 Feb 2021 13:46:24 -0500 +Subject: [PATCH] virtiofsd: Allow to build it without the tools + +This changed the Meson build script to allow virtiofsd be built even +though the tools build is disabled, thus honoring the --enable-virtiofsd +option. + +(Backport of commit xxxxxx) +Signed-off-by: Wainer dos Santos Moschetta +--- + tools/meson.build | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tools/meson.build b/tools/meson.build +index fdce66857d..3e5a0abfa2 100644 +--- a/tools/meson.build ++++ b/tools/meson.build +@@ -10,8 +10,11 @@ if get_option('virtiofsd').enabled() + error('virtiofsd requires Linux') + elif not seccomp.found() or not libcap_ng.found() + error('virtiofsd requires libcap-ng-devel and seccomp-devel') +- elif not have_tools or 'CONFIG_VHOST_USER' not in config_host +- error('virtiofsd needs tools and vhost-user support') ++ elif 'CONFIG_VHOST_USER' not in config_host ++ error('virtiofsd needs vhost-user support') ++ else ++ # Disabled all the tools but virtiofsd. ++ have_virtiofsd = true + endif + endif + elif get_option('virtiofsd').disabled() or not have_system +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch new file mode 100644 index 0000000000..c5893642a1 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0002-virtiofsd-add-security-guide-document.patch @@ -0,0 +1,155 @@ +From bb506adc3bc3e3c0cad695b3bab126afdc3f0536 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 16 May 2019 15:11:35 +0100 +Subject: [PATCH 02/29] virtiofsd: add security guide document +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Many people want to know: what's up with virtiofsd and security? This +document provides the answers! + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +--- + docs/tools/index.rst | 1 + + docs/tools/virtiofsd-security.rst | 118 ++++++++++++++++++++++++++++++ + 2 files changed, 119 insertions(+) + create mode 100644 docs/tools/virtiofsd-security.rst + +diff --git a/docs/tools/index.rst b/docs/tools/index.rst +index 3a5829c17a..d5b65f803b 100644 +--- a/docs/tools/index.rst ++++ b/docs/tools/index.rst +@@ -17,3 +17,4 @@ Contents: + qemu-trace-stap + virtfs-proxy-helper + virtiofsd ++ virtiofsd-security +diff --git a/docs/tools/virtiofsd-security.rst b/docs/tools/virtiofsd-security.rst +new file mode 100644 +index 0000000000..61ce551344 +--- /dev/null ++++ b/docs/tools/virtiofsd-security.rst +@@ -0,0 +1,118 @@ ++======================== ++Virtiofsd Security Guide ++======================== ++ ++Introduction ++============ ++This document covers security topics for users of virtiofsd, the daemon that ++implements host<->guest file system sharing. Sharing files between one or more ++guests and the host raises questions about the trust relationships between ++these entities. By understanding these topics users can safely deploy ++virtiofsd and control access to their data. ++ ++Architecture ++============ ++The virtiofsd daemon process acts as a vhost-user device backend, implementing ++the virtio-fs device that the corresponding device driver inside the guest ++interacts with. ++ ++There is one virtiofsd process per virtio-fs device instance. For example, ++when two guests have access to the same shared directory there are still two ++virtiofsd processes since there are two virtio-fs device instances. Similarly, ++if one guest has access to two shared directories, there are two virtiofsd ++processes since there are two virtio-fs device instances. ++ ++Files are created on the host with uid/gid values provided by the guest. ++Furthermore, virtiofsd is unable to enforce file permissions since guests have ++the ability to access any file within the shared directory. File permissions ++are implemented in the guest, just like with traditional local file systems. ++ ++Security Requirements ++===================== ++Guests have root access to the shared directory. This is necessary for root ++file systems on virtio-fs and similar use cases. ++ ++When multiple guests have access to the same shared directory, the guests have ++a trust relationship. A broken or malicious guest could delete or corrupt ++files. It could exploit symlink or time-of-check-to-time-of-use (TOCTOU) race ++conditions against applications in other guests. It could plant device nodes ++or setuid executables to gain privileges in other guests. It could perform ++denial-of-service (DoS) attacks by consuming available space or making the file ++system unavailable to other guests. ++ ++Guests are restricted to the shared directory and cannot access other files on ++the host. ++ ++Guests should not be able to gain arbitrary code execution inside the virtiofsd ++process. If they do, the process is sandboxed to prevent escaping into other ++parts of the host. ++ ++Daemon Sandboxing ++================= ++The virtiofsd process handles virtio-fs FUSE requests from the untrusted guest. ++This attack surface could give the guest access to host resources and must ++therefore be protected. Sandboxing mechanisms are integrated into virtiofsd to ++reduce the impact in the event that an attacker gains control of the process. ++ ++As a general rule, virtiofsd does not trust inputs from the guest, aside from ++uid/gid values. Input validation is performed so that the guest cannot corrupt ++memory or otherwise gain arbitrary code execution in the virtiofsd process. ++ ++Sandboxing adds restrictions on the virtiofsd so that even if an attacker is ++able to exploit a bug, they will be constrained to the virtiofsd process and ++unable to cause damage on the host. ++ ++Seccomp Whitelist ++----------------- ++Many system calls are not required by virtiofsd to perform its function. For ++example, ptrace(2) and execve(2) are not necessary and attackers are likely to ++use them to further compromise the system. This is prevented using a seccomp ++whitelist in virtiofsd. ++ ++During startup virtiofsd installs a whitelist of allowed system calls. All ++other system calls are forbidden for the remaining lifetime of the process. ++This list has been built through experience of running virtiofsd on several ++flavors of Linux and observing which system calls were encountered. ++ ++It is possible that previously unexplored code paths or newer library versions ++will invoke system calls that have not been whitelisted yet. In this case the ++process terminates and a seccomp error is captured in the audit log. The log ++can typically be viewed using ``journalctl -xe`` and searching for ``SECCOMP``. ++ ++Should it be necessary to extend the whitelist, system call numbers from the ++audit log can be translated to names through a CPU architecture-specific ++``.tbl`` file in the Linux source tree. They can then be added to the ++whitelist in ``seccomp.c`` in the virtiofsd source tree. ++ ++Mount Namespace ++--------------- ++During startup virtiofsd enters a new mount namespace and releases all mounts ++except for the shared directory. This makes the file system root `/` the ++shared directory. It is impossible to access files outside the shared ++directory since they cannot be looked up by path resolution. ++ ++Several attacks, including `..` traversal and symlink escapes, are prevented by ++the mount namespace. ++ ++The current virtiofsd implementation keeps a directory file descriptor to ++/proc/self/fd open in order to implement several FUSE requests. This file ++descriptor could be used by attackers to access files outside the shared ++directory. This limitation will be addressed in a future release of virtiofsd. ++ ++Other Namespaces ++---------------- ++Virtiofsd enters new pid and network namespaces during startup. The pid ++namespace prevents the process from seeing other processes running on the host. ++The network namespace removes network connectivity from the process. ++ ++Deployment Best Practices ++========================= ++The shared directory should be a separate file system so that untrusted guests ++cannot cause a denial-of-service by using up all available inodes or exhausting ++free space. ++ ++If the shared directory is also accessible from a host mount namespace, it is ++recommended to keep a parent directory with rwx------ permissions so that other ++users on the host are unable to access any setuid executables or device nodes ++in the shared directory. The `nosuid` and `nodev` mount options can also be ++used to prevent this issue. +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch new file mode 100644 index 0000000000..7afe567577 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0003-DAX-contrib-libvhost-user-Add-virtio-fs-slave-types.patch @@ -0,0 +1,110 @@ +From 800ce0d08e09320ac2f1bd9125cb07d14a2689fe Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Feb 2019 18:39:31 +0000 +Subject: [PATCH 03/29] DAX contrib/libvhost-user: Add virtio-fs slave types + +Add virtio-fs definitions to libvhost-user + +Signed-off-by: Dr. David Alan Gilbert +--- + subprojects/libvhost-user/libvhost-user.c | 32 +++++++++++++++++++++++ + subprojects/libvhost-user/libvhost-user.h | 31 ++++++++++++++++++++++ + 2 files changed, 63 insertions(+) + +diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c +index 3b1b5c385f..9b8223b5d5 100644 +--- a/subprojects/libvhost-user/libvhost-user.c ++++ b/subprojects/libvhost-user/libvhost-user.c +@@ -2910,3 +2910,35 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, + vu_queue_flush(dev, vq, 1); + vu_queue_inflight_post_put(dev, vq, elem->index); + } ++ ++bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm) ++{ ++ int fd_num = 0; ++ VhostUserMsg vmsg = { ++ .request = req, ++ .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, ++ .size = sizeof(vmsg.payload.fs), ++ .payload.fs = *fsm, ++ }; ++ ++ if (fd != -1) { ++ vmsg.fds[fd_num++] = fd; ++ } ++ ++ vmsg.fd_num = fd_num; ++ ++ if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { ++ return false; ++ } ++ ++ pthread_mutex_lock(&dev->slave_mutex); ++ if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return false; ++ } ++ ++ /* Also unlocks the slave_mutex */ ++ return vu_process_message_reply(dev, &vmsg); ++} ++ +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index f0aca2b216..f3b0998eea 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -122,6 +122,24 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + ++/* Structures carried over the slave channel back to QEMU */ ++#define VHOST_USER_FS_SLAVE_ENTRIES 8 ++ ++/* For the flags field of VhostUserFSSlaveMsg */ ++#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0) ++#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1) ++ ++typedef struct { ++ /* Offsets within the file being mapped */ ++ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Offsets within the cache */ ++ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Lengths of sections */ ++ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Flags, from VHOST_USER_FS_FLAG_* */ ++ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES]; ++} VhostUserFSSlaveMsg; ++ + typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; +@@ -197,6 +215,7 @@ typedef struct VhostUserMsg { + VhostUserConfig config; + VhostUserVringArea area; + VhostUserInflight inflight; ++ VhostUserFSSlaveMsg fs; + } payload; + + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; +@@ -693,4 +712,16 @@ void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, + bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, + unsigned int out_bytes); + ++/** ++ * vu_fs_cache_request: Send a slave message for an fs client ++ * @dev: a VuDev context ++ * @req: The request type (map, unmap, sync) ++ * @fd: an fd (only required for map, else must be -1) ++ * @fsm: The body of the message ++ * ++ * Returns: true if the reply was 0 ++ */ ++bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm); ++ + #endif /* LIBVHOST_USER_H */ +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch new file mode 100644 index 0000000000..86e14b1132 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0004-DAX-virtio-Add-shared-memory-capability.patch @@ -0,0 +1,65 @@ +From 71c89288b97c92ecb3a67ca8aa73619719dcfe9e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 27 Jul 2018 12:38:03 +0100 +Subject: [PATCH 04/29] DAX: virtio: Add shared memory capability + +Define a new capability type 'VIRTIO_PCI_CAP_SHARED_MEMORY_CFG' +and the data structure 'virtio_pci_cap64' to go with it. +They allow defining shared memory regions with sizes and offsets +of 2^32 and more. +Multiple instances of the capability are allowed and distinguished +by the 'id' field in the base capability. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/virtio/virtio-pci.c | 20 ++++++++++++++++++++ + hw/virtio/virtio-pci.h | 4 ++++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index f863f69ede..f17ea5a6e8 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1136,6 +1136,26 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, + return offset; + } + ++int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, ++ uint8_t bar, uint64_t offset, uint64_t length, ++ uint8_t id) ++{ ++ struct virtio_pci_cap64 cap = { ++ .cap.cap_len = sizeof cap, ++ .cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG, ++ }; ++ uint32_t mask32 = ~0; ++ ++ cap.cap.bar = bar; ++ cap.cap.id = id; ++ cap.cap.length = cpu_to_le32(length & mask32); ++ cap.length_hi = cpu_to_le32((length >> 32) & mask32); ++ cap.cap.offset = cpu_to_le32(offset & mask32); ++ cap.offset_hi = cpu_to_le32((offset >> 32) & mask32); ++ ++ return virtio_pci_add_mem_cap(proxy, &cap.cap); ++} ++ + static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, + unsigned size) + { +diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h +index d7d5d403a9..31ca339099 100644 +--- a/hw/virtio/virtio-pci.h ++++ b/hw/virtio/virtio-pci.h +@@ -247,4 +247,8 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + */ + unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); + ++int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, ++ uint8_t bar, uint64_t offset, uint64_t length, ++ uint8_t id); ++ + #endif +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch new file mode 100644 index 0000000000..4d03f5b033 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0005-DAX-virtio-fs-Add-cache-BAR.patch @@ -0,0 +1,171 @@ +From 3996e9086ddd591494f9cb7f0eb7048a1b52200c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Jul 2018 16:33:52 +0100 +Subject: [PATCH 05/29] DAX: virtio-fs: Add cache BAR + +Add a cache BAR into which files will be directly mapped. +The size can be set with the cache-size= property, e.g. + -device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=16G + +The default is no cache. + +Signed-off-by: Dr. David Alan Gilbert +with PPC fixes by: +Signed-off-by: Fabiano Rosas +--- + hw/virtio/vhost-user-fs-pci.c | 23 ++++++++++++++++ + hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++ + include/hw/virtio/vhost-user-fs.h | 2 ++ + include/standard-headers/linux/virtio_fs.h | 2 ++ + 4 files changed, 59 insertions(+) + +diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c +index 8bb389bd28..19aaa8d722 100644 +--- a/hw/virtio/vhost-user-fs-pci.c ++++ b/hw/virtio/vhost-user-fs-pci.c +@@ -16,10 +16,12 @@ + #include "hw/virtio/vhost-user-fs.h" + #include "virtio-pci.h" + #include "qom/object.h" ++#include "standard-headers/linux/virtio_fs.h" + + struct VHostUserFSPCI { + VirtIOPCIProxy parent_obj; + VHostUserFS vdev; ++ MemoryRegion cachebar; + }; + + typedef struct VHostUserFSPCI VHostUserFSPCI; +@@ -39,6 +41,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + { + VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); ++ uint64_t cachesize; + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + /* Also reserve config change and hiprio queue vectors */ +@@ -46,6 +49,26 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + } + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); ++ cachesize = dev->vdev.conf.cache_size; ++ ++ /* ++ * The bar starts with the data/DAX cache ++ * Others will be added later. ++ */ ++ memory_region_init(&dev->cachebar, OBJECT(vpci_dev), ++ "vhost-fs-pci-cachebar", cachesize); ++ if (cachesize) { ++ memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache); ++ virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize, ++ VIRTIO_FS_SHMCAP_ID_CACHE); ++ } ++ ++ /* After 'realized' so the memory region exists */ ++ pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR, ++ PCI_BASE_ADDRESS_SPACE_MEMORY | ++ PCI_BASE_ADDRESS_MEM_PREFETCH | ++ PCI_BASE_ADDRESS_MEM_TYPE_64, ++ &dev->cachebar); + } + + static void vhost_user_fs_pci_class_init(ObjectClass *klass, void *data) +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index ed036ad9c1..d111bf2af3 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -23,6 +23,16 @@ + #include "hw/virtio/vhost-user-fs.h" + #include "monitor/monitor.h" + ++/* ++ * The powerpc kernel code expects the memory to be accessible during ++ * addition/removal. ++ */ ++#if defined(TARGET_PPC64) && defined(CONFIG_LINUX) ++#define DAX_WINDOW_PROT PROT_READ ++#else ++#define DAX_WINDOW_PROT PROT_NONE ++#endif ++ + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); +@@ -162,6 +172,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserFS *fs = VHOST_USER_FS(dev); ++ void *cache_ptr; + unsigned int i; + size_t len; + int ret; +@@ -201,6 +212,26 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) + VIRTQUEUE_MAX_SIZE); + return; + } ++ if (fs->conf.cache_size && ++ (!is_power_of_2(fs->conf.cache_size) || ++ fs->conf.cache_size < sysconf(_SC_PAGESIZE))) { ++ error_setg(errp, "cache-size property must be a power of 2 " ++ "no smaller than the page size"); ++ return; ++ } ++ if (fs->conf.cache_size) { ++ /* Anonymous, private memory is not counted as overcommit */ ++ cache_ptr = mmap(NULL, fs->conf.cache_size, DAX_WINDOW_PROT, ++ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ++ if (cache_ptr == MAP_FAILED) { ++ error_setg(errp, "Unable to mmap blank cache"); ++ return; ++ } ++ ++ memory_region_init_ram_ptr(&fs->cache, OBJECT(vdev), ++ "virtio-fs-cache", ++ fs->conf.cache_size, cache_ptr); ++ } + + if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) { + return; +@@ -276,6 +307,7 @@ static Property vuf_properties[] = { + DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, + conf.num_request_queues, 1), + DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), ++ DEFINE_PROP_SIZE("cache-size", VHostUserFS, conf.cache_size, 0), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 6985752771..df6bf2a926 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -28,6 +28,7 @@ typedef struct { + char *tag; + uint16_t num_request_queues; + uint16_t queue_size; ++ uint64_t cache_size; + } VHostUserFSConf; + + struct VHostUserFS { +@@ -41,6 +42,7 @@ struct VHostUserFS { + VirtQueue *hiprio_vq; + + /*< public >*/ ++ MemoryRegion cache; + }; + + #endif /* _QEMU_VHOST_USER_FS_H */ +diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h +index a32fe8a64c..808aa3a402 100644 +--- a/include/standard-headers/linux/virtio_fs.h ++++ b/include/standard-headers/linux/virtio_fs.h +@@ -19,4 +19,6 @@ struct virtio_fs_config { + /* For the id field in virtio_pci_shm_cap */ + #define VIRTIO_FS_SHMCAP_ID_CACHE 0 + ++#define VIRTIO_FS_PCI_CACHE_BAR 2 ++ + #endif /* _LINUX_VIRTIO_FS_H */ +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch new file mode 100644 index 0000000000..e60b5a9d1f --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0006-DAX-virtio-fs-Add-vhost-user-slave-commands-for-mapp.patch @@ -0,0 +1,191 @@ +From 27ccc5e4aecbffd590199bae897a8359889fd54d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 4 Jul 2018 18:51:42 +0100 +Subject: [PATCH 06/29] DAX: virtio-fs: Add vhost-user slave commands for + mapping + +The daemon may request that fd's be mapped into the virtio-fs cache +visible to the guest. +These mappings are triggered by commands sent over the slave fd +from the daemon. + +Signed-off-by: Dr. David Alan Gilbert +--- + docs/interop/vhost-user.rst | 23 ++++++++++++++++++++++ + hw/virtio/vhost-user-fs.c | 19 ++++++++++++++++++ + hw/virtio/vhost-user.c | 18 +++++++++++++++++ + include/hw/virtio/vhost-user-fs.h | 24 +++++++++++++++++++++++ + subprojects/libvhost-user/libvhost-user.h | 3 +++ + 5 files changed, 87 insertions(+) + +diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst +index d6085f7045..056f94c6fb 100644 +--- a/docs/interop/vhost-user.rst ++++ b/docs/interop/vhost-user.rst +@@ -1432,6 +1432,29 @@ Slave message types + + The state.num field is currently reserved and must be set to 0. + ++``VHOST_USER_SLAVE_FS_MAP`` ++ :id: 6 ++ :equivalent ioctl: N/A ++ :slave payload: fd + n * (offset + address + len) ++ :master payload: N/A ++ ++ Requests that the QEMU mmap the given fd into the virtio-fs cache; ++ multiple chunks can be mapped in one command. ++ A reply is generated indicating whether mapping succeeded. ++ ++``VHOST_USER_SLAVE_FS_UNMAP`` ++ :id: 7 ++ :equivalent ioctl: N/A ++ :slave payload: n * (address + len) ++ :master payload: N/A ++ ++ Requests that the QEMU un-mmap the given range in the virtio-fs cache; ++ multiple chunks can be unmapped in one command. ++ A reply is generated indicating whether unmapping succeeded. ++ ++``VHOST_USER_SLAVE_FS_SYNC`` ++ [Semantic details TBD] ++ + .. _reply_ack: + + VHOST_USER_PROTOCOL_F_REPLY_ACK +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index d111bf2af3..9c35fdbeab 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -33,6 +33,25 @@ + #define DAX_WINDOW_PROT PROT_NONE + #endif + ++int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd) ++{ ++ /* TODO */ ++ return -1; ++} ++ ++int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) ++{ ++ /* TODO */ ++ return -1; ++} ++ ++int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) ++{ ++ /* TODO */ ++ return -1; ++} ++ + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 2fdd5daf74..757dee0d1e 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -12,6 +12,7 @@ + #include "qapi/error.h" + #include "hw/virtio/vhost.h" + #include "hw/virtio/vhost-user.h" ++#include "hw/virtio/vhost-user-fs.h" + #include "hw/virtio/vhost-backend.h" + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-net.h" +@@ -132,6 +133,11 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, ++ VHOST_USER_SLAVE_VRING_CALL = 4, ++ VHOST_USER_SLAVE_VRING_ERR = 5, ++ VHOST_USER_SLAVE_FS_MAP = 6, ++ VHOST_USER_SLAVE_FS_UNMAP = 7, ++ VHOST_USER_SLAVE_FS_SYNC = 8, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +@@ -218,6 +224,7 @@ typedef union { + VhostUserCryptoSession session; + VhostUserVringArea area; + VhostUserInflight inflight; ++ VhostUserFSSlaveMsg fs; + } VhostUserPayload; + + typedef struct VhostUserMsg { +@@ -1470,6 +1477,17 @@ static void slave_read(void *opaque) + ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + fd[0]); + break; ++#ifdef CONFIG_VHOST_USER_FS ++ case VHOST_USER_SLAVE_FS_MAP: ++ ret = vhost_user_fs_slave_map(dev, &payload.fs, fd[0]); ++ break; ++ case VHOST_USER_SLAVE_FS_UNMAP: ++ ret = vhost_user_fs_slave_unmap(dev, &payload.fs); ++ break; ++ case VHOST_USER_SLAVE_FS_SYNC: ++ ret = vhost_user_fs_slave_sync(dev, &payload.fs); ++ break; ++#endif + default: + error_report("Received unexpected msg type: %d.", hdr.request); + ret = -EINVAL; +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index df6bf2a926..69cc6340ed 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -23,6 +23,24 @@ + #define TYPE_VHOST_USER_FS "vhost-user-fs-device" + OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS) + ++/* Structures carried over the slave channel back to QEMU */ ++#define VHOST_USER_FS_SLAVE_ENTRIES 8 ++ ++/* For the flags field of VhostUserFSSlaveMsg */ ++#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0) ++#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1) ++ ++typedef struct { ++ /* Offsets within the file being mapped */ ++ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Offsets within the cache */ ++ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Lengths of sections */ ++ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES]; ++ /* Flags, from VHOST_USER_FS_FLAG_* */ ++ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES]; ++} VhostUserFSSlaveMsg; ++ + typedef struct { + CharBackend chardev; + char *tag; +@@ -45,4 +63,10 @@ struct VHostUserFS { + MemoryRegion cache; + }; + ++/* Callbacks from the vhost-user code for slave commands */ ++int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd); ++int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); ++int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); ++ + #endif /* _QEMU_VHOST_USER_FS_H */ +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index f3b0998eea..c63a590069 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -119,6 +119,9 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, + VHOST_USER_SLAVE_VRING_ERR = 5, ++ VHOST_USER_SLAVE_FS_MAP = 6, ++ VHOST_USER_SLAVE_FS_UNMAP = 7, ++ VHOST_USER_SLAVE_FS_SYNC = 8, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch new file mode 100644 index 0000000000..8f75dd706e --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch @@ -0,0 +1,98 @@ +From 3de89ce9fb5eda46f7cefd70e9090cb7cd7ec803 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Wed, 28 Mar 2018 20:14:53 +0800 +Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O + performance + +This is a quick workaround, need to be fixed. + +Signed-off-by: Chao Peng +--- + hw/9pfs/9p.c | 12 +++++------- + hw/9pfs/9p.h | 6 +++--- + hw/9pfs/coth.h | 3 +++ + 3 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c +index 9e046f7acb..11c8ee08d9 100644 +--- a/hw/9pfs/9p.c ++++ b/hw/9pfs/9p.c +@@ -1082,10 +1082,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) + out_notify: + pdu->s->transport->push_and_notify(pdu); + +- /* Now wakeup anybody waiting in flush for this request */ +- if (!qemu_co_queue_next(&pdu->complete)) { +- pdu_free(pdu); +- } ++ pdu_free(pdu); + } + + static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) +@@ -3997,7 +3994,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu) + + void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) + { +- Coroutine *co; ++// Coroutine *co; + CoroutineEntry *handler; + V9fsState *s = pdu->s; + +@@ -4015,8 +4012,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) + } + + qemu_co_queue_init(&pdu->complete); +- co = qemu_coroutine_create(handler, pdu); +- qemu_coroutine_enter(co); ++ handler(pdu); ++ //co = qemu_coroutine_create(handler, pdu); ++ //qemu_coroutine_enter(co); + } + + /* Returns 0 on success, 1 on failure. */ +diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h +index b8f72a3bd9..d16bf9d05e 100644 +--- a/hw/9pfs/9p.h ++++ b/hw/9pfs/9p.h +@@ -391,21 +391,21 @@ extern int total_open_fd; + static inline void v9fs_path_write_lock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_wrlock(&s->rename_lock); ++ // qemu_co_rwlock_wrlock(&s->rename_lock); + } + } + + static inline void v9fs_path_read_lock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_rdlock(&s->rename_lock); ++ // qemu_co_rwlock_rdlock(&s->rename_lock); + } + } + + static inline void v9fs_path_unlock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_unlock(&s->rename_lock); ++ // qemu_co_rwlock_unlock(&s->rename_lock); + } + } + +diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h +index c2cdc7a9ea..0fe971d1f5 100644 +--- a/hw/9pfs/coth.h ++++ b/hw/9pfs/coth.h +@@ -46,6 +46,9 @@ + qemu_coroutine_yield(); \ + } while (0) + ++#undef v9fs_co_run_in_worker ++#define v9fs_co_run_in_worker(code_block) do {code_block} while(0); ++ + void co_run_in_worker_bh(void *); + int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *); + int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **); +-- +2.21.0 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch new file mode 100644 index 0000000000..0bb6e10a90 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0007-DAX-virtio-fs-Fill-in-slave-commands-for-mapping.patch @@ -0,0 +1,196 @@ +From a0d09868a25b9b15b8ef49402b035597ef889f85 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 4 Jul 2018 20:01:51 +0100 +Subject: [PATCH 07/29] DAX: virtio-fs: Fill in slave commands for mapping + +Fill in definitions for map, unmap and sync commands. + +Signed-off-by: Dr. David Alan Gilbert +with fix by misono.tomohiro@fujitsu.com +--- + hw/virtio/vhost-user-fs.c | 161 ++++++++++++++++++++++++++++++++++++-- + 1 file changed, 155 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 9c35fdbeab..98cec993f7 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -36,20 +36,169 @@ + int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + int fd) + { +- /* TODO */ +- return -1; ++ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); ++ if (!fs) { ++ /* Shouldn't happen - but seen on error path */ ++ fprintf(stderr, "%s: Bad fs ptr\n", __func__); ++ return -1; ++ } ++ size_t cache_size = fs->conf.cache_size; ++ if (!cache_size) { ++ fprintf(stderr, "%s: map when DAX cache not present\n", __func__); ++ return -1; ++ } ++ void *cache_host = memory_region_get_ram_ptr(&fs->cache); ++ ++ unsigned int i; ++ int res = 0; ++ ++ if (fd < 0) { ++ fprintf(stderr, "%s: Bad fd for map\n", __func__); ++ return -1; ++ } ++ ++ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { ++ if (sm->len[i] == 0) { ++ continue; ++ } ++ ++ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || ++ (sm->c_offset[i] + sm->len[i]) > cache_size) { ++ fprintf(stderr, "%s: Bad offset/len for map [%d] %" ++ PRIx64 "+%" PRIx64 "\n", __func__, ++ i, sm->c_offset[i], sm->len[i]); ++ res = -1; ++ break; ++ } ++ ++ if (mmap(cache_host + sm->c_offset[i], sm->len[i], ++ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) ? PROT_READ : 0) | ++ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0), ++ MAP_SHARED | MAP_FIXED, ++ fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) { ++ fprintf(stderr, "%s: map failed err %d [%d] %" ++ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, ++ errno, i, sm->c_offset[i], sm->len[i], ++ sm->fd_offset[i]); ++ res = -1; ++ break; ++ } ++ } ++ ++ if (res) { ++ /* Something went wrong, unmap them all */ ++ vhost_user_fs_slave_unmap(dev, sm); ++ } ++ return res; + } + + int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + { +- /* TODO */ +- return -1; ++ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); ++ if (!fs) { ++ /* Shouldn't happen - but seen on error path */ ++ fprintf(stderr, "%s: Bad fs ptr\n", __func__); ++ return -1; ++ } ++ size_t cache_size = fs->conf.cache_size; ++ if (!cache_size) { ++ /* ++ * Since dax cache is disabled, there should be no unmap request. ++ * Howerver we still receives whole range unmap request during umount ++ * for cleanup. Ignore it. ++ */ ++ if (sm->len[0] == ~(uint64_t)0) { ++ return 0; ++ } ++ ++ fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__); ++ return -1; ++ } ++ void *cache_host = memory_region_get_ram_ptr(&fs->cache); ++ ++ unsigned int i; ++ int res = 0; ++ ++ /* ++ * Note even if one unmap fails we try the rest, since the effect ++ * is to clean up as much as possible. ++ */ ++ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { ++ void *ptr; ++ if (sm->len[i] == 0) { ++ continue; ++ } ++ ++ if (sm->len[i] == ~(uint64_t)0) { ++ /* Special case meaning the whole arena */ ++ sm->len[i] = cache_size; ++ } ++ ++ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || ++ (sm->c_offset[i] + sm->len[i]) > cache_size) { ++ fprintf(stderr, "%s: Bad offset/len for unmap [%d] %" ++ PRIx64 "+%" PRIx64 "\n", __func__, ++ i, sm->c_offset[i], sm->len[i]); ++ res = -1; ++ continue; ++ } ++ ++ ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT, ++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); ++ if (ptr != (cache_host + sm->c_offset[i])) { ++ fprintf(stderr, "%s: mmap failed (%s) [%d] %" ++ PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n", ++ __func__, ++ strerror(errno), ++ i, sm->c_offset[i], sm->len[i], ++ sm->fd_offset[i], ptr); ++ res = -1; ++ } ++ } ++ ++ return res; + } + + int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + { +- /* TODO */ +- return -1; ++ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); ++ size_t cache_size = fs->conf.cache_size; ++ if (!cache_size) { ++ fprintf(stderr, "%s: sync when DAX cache not present\n", __func__); ++ return -1; ++ } ++ void *cache_host = memory_region_get_ram_ptr(&fs->cache); ++ ++ unsigned int i; ++ int res = 0; ++ ++ /* Note even if one sync fails we try the rest */ ++ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { ++ if (sm->len[i] == 0) { ++ continue; ++ } ++ ++ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] || ++ (sm->c_offset[i] + sm->len[i]) > cache_size) { ++ fprintf(stderr, "%s: Bad offset/len for sync [%d] %" ++ PRIx64 "+%" PRIx64 "\n", __func__, ++ i, sm->c_offset[i], sm->len[i]); ++ res = -1; ++ continue; ++ } ++ ++ if (msync(cache_host + sm->c_offset[i], sm->len[i], ++ MS_SYNC /* ?? */)) { ++ fprintf(stderr, "%s: msync failed (%s) [%d] %" ++ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, ++ strerror(errno), ++ i, sm->c_offset[i], sm->len[i], ++ sm->fd_offset[i]); ++ res = -1; ++ } ++ } ++ ++ return res; + } + + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch new file mode 100644 index 0000000000..1765a2dd4f --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0008-DAX-virtiofsd-Add-cache-accessor-functions.patch @@ -0,0 +1,99 @@ +From b341b9541023b0a9f0a315ef24e81522b273e552 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 5 Jul 2018 18:20:34 +0100 +Subject: [PATCH 08/29] DAX: virtiofsd Add cache accessor functions + +Add low level functions that the clients can use to map/unmap/sync cache +areas. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/fuse_lowlevel.h | 31 +++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 58 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 0e10a14bc9..b0d111bcb2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -29,6 +29,8 @@ + #include + #include + ++#include "subprojects/libvhost-user/libvhost-user.h" ++ + /* + * Miscellaneous definitions + */ +@@ -1970,4 +1972,33 @@ void fuse_session_process_buf(struct fuse_session *se, + */ + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + ++/** ++ * For use with virtio-fs; request an fd be mapped into the cache ++ * ++ * @param req The request that triggered this action ++ * @param msg A set of mapping requests ++ * @param fd The fd to map ++ * @return Zero on success ++ */ ++int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); ++ ++/** ++ * For use with virtio-fs; request unmapping of part of the cache ++ * ++ * @param se The session this request is on ++ * @param msg A set of unmapping requests ++ * @return Zero on success ++ */ ++int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); ++ ++/** ++ * For use with virtio-fs; request synchronisation of part of the cache ++ * [Semantics TBD] ++ * ++ * @param req The request that triggered this action ++ * @param msg A set of syncing requests ++ * @return Zero on success ++ */ ++int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); ++ + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index bd19358437..24d9323665 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -1044,3 +1044,30 @@ void virtio_session_close(struct fuse_session *se) + free(se->virtio_dev); + se->virtio_dev = NULL; + } ++ ++int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) ++{ ++ if (!req->se->virtio_dev) { ++ return -ENODEV; ++ } ++ return !vu_fs_cache_request(&req->se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_MAP, fd, msg); ++} ++ ++int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) ++{ ++ if (!se->virtio_dev) { ++ return -ENODEV; ++ } ++ return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, ++ -1, msg); ++} ++ ++int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) ++{ ++ if (!req->se->virtio_dev) { ++ return -ENODEV; ++ } ++ return !vu_fs_cache_request(&req->se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_SYNC, -1, msg); ++} +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch new file mode 100644 index 0000000000..8b21a9e58e --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0009-DAX-virtiofsd-Add-setup-remove-mappings-fuse-command.patch @@ -0,0 +1,152 @@ +From c3273cefbec6f5637189ad1cb9a8b7722cc01294 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 6 Jul 2018 18:03:49 +0100 +Subject: [PATCH 09/29] DAX: virtiofsd: Add setup/remove mappings fuse commands + +Add commands so that the guest kernel can ask the daemon to map file +sections into a guest kernel visible cache. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Signed-off-by: Peng Tao +--- + tools/virtiofsd/fuse_lowlevel.c | 67 +++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_lowlevel.h | 23 ++++++++++- + 2 files changed, 89 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e94b71110b..1c3790130a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1868,6 +1868,71 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, + } + } + ++static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) ++{ ++ struct fuse_setupmapping_in *arg; ++ struct fuse_file_info fi; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ /* ++ * TODO: Need to come up with a better definition of flags here; it can't ++ * be the kernel view of the flags, since that's abstracted from the client ++ * similarly, it's not the vhost-user set ++ * for now just use O_ flags ++ */ ++ uint64_t genflags; ++ ++ genflags = O_RDONLY; ++ if (arg->flags & FUSE_SETUPMAPPING_FLAG_WRITE) { ++ genflags = O_RDWR; ++ } ++ ++ if (req->se->op.setupmapping) { ++ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len, ++ arg->moffset, genflags, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_removemapping(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) ++{ ++ struct fuse_removemapping_in *arg; ++ struct fuse_removemapping_one *one; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ one = fuse_mbuf_iter_advance(iter, sizeof(*one)); ++ if (!one) { ++ fuse_log( ++ FUSE_LOG_ERR, ++ "do_removemapping: invalid in, expected %d * %ld, has %ld - %ld\n", ++ arg->count, sizeof(*one), iter->size, iter->pos); ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ if (req->se->op.removemapping) { ++ req->se->op.removemapping(req, req->se, nodeid, arg->count, one); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ + static void do_init(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_mbuf_iter *iter) + { +@@ -2258,6 +2323,8 @@ static struct { + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, + [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [FUSE_SETUPMAPPING] = { do_setupmapping, "SETUPMAPPING" }, ++ [FUSE_REMOVEMAPPING] = { do_removemapping, "REMOVEMAPPING" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index b0d111bcb2..2851840cc2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -24,6 +24,7 @@ + #endif + + #include "fuse_common.h" ++#include "standard-headers/linux/fuse.h" + + #include + #include +@@ -1170,7 +1171,6 @@ struct fuse_lowlevel_ops { + */ + void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi); +- + /** + * Copy a range of data from one file to another + * +@@ -1226,6 +1226,27 @@ struct fuse_lowlevel_ops { + */ + void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + struct fuse_file_info *fi); ++ ++ /* ++ * Map file sections into kernel visible cache ++ * ++ * Map a section of the file into address space visible to the kernel ++ * mounting the filesystem. ++ * TODO ++ */ ++ void (*setupmapping)(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, ++ uint64_t len, uint64_t moffset, uint64_t flags, ++ struct fuse_file_info *fi); ++ ++ /* ++ * Unmap file sections in kernel visible cache ++ * ++ * Unmap sections previously mapped by setupmapping ++ * TODO ++ */ ++ void (*removemapping)(fuse_req_t req, struct fuse_session *se, ++ fuse_ino_t ino, unsigned num, ++ struct fuse_removemapping_one *argp); + }; + + /** +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch new file mode 100644 index 0000000000..66837dbf60 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0010-DAX-virtiofsd-Add-setup-remove-mapping-handlers-to-p.patch @@ -0,0 +1,50 @@ +From 7029506e6b23fc15f2b7c4a6a62aa3a0ee58fb02 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 6 Jul 2018 19:52:49 +0100 +Subject: [PATCH 10/29] DAX: virtiofsd: Add setup/remove mapping handlers to + passthrough_ll + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5fb36d9407..784bdcff34 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2891,6 +2891,22 @@ static void lo_destroy(void *userdata) + pthread_mutex_unlock(&lo->mutex); + } + ++static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, ++ uint64_t len, uint64_t moffset, uint64_t flags, ++ struct fuse_file_info *fi) ++{ ++ // TODO ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void lo_removemapping(fuse_req_t req, struct fuse_session *se, ++ fuse_ino_t ino, unsigned num, ++ struct fuse_removemapping_one *argp) ++{ ++ // TODO ++ fuse_reply_err(req, ENOSYS); ++} ++ + static struct fuse_lowlevel_ops lo_oper = { + .init = lo_init, + .lookup = lo_lookup, +@@ -2932,6 +2948,8 @@ static struct fuse_lowlevel_ops lo_oper = { + #endif + .lseek = lo_lseek, + .destroy = lo_destroy, ++ .setupmapping = lo_setupmapping, ++ .removemapping = lo_removemapping, + }; + + /* Print vhost-user.json backend program capabilities */ +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch new file mode 100644 index 0000000000..55833230aa --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0011-DAX-virtiofsd-Wire-up-passthrough_ll-s-lo_setupmappi.patch @@ -0,0 +1,53 @@ +From 15fb0e84e38c2681e855e69b58414ba831b399bf Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 9 Jul 2018 19:57:16 +0100 +Subject: [PATCH 11/29] DAX: virtiofsd: Wire up passthrough_ll's + lo_setupmapping + +Wire up passthrough_ll's setupmapping to allocate, send to virtio +and then reply OK. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +--- + tools/virtiofsd/passthrough_ll.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 784bdcff34..b57cb4079e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2895,8 +2895,28 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, + uint64_t len, uint64_t moffset, uint64_t flags, + struct fuse_file_info *fi) + { +- // TODO +- fuse_reply_err(req, ENOSYS); ++ int ret = 0; ++ VhostUserFSSlaveMsg msg = { 0 }; ++ uint64_t vhu_flags; ++ bool writable = flags & O_RDWR; ++ ++ vhu_flags = VHOST_USER_FS_FLAG_MAP_R; ++ if (writable) { ++ vhu_flags |= VHOST_USER_FS_FLAG_MAP_W; ++ } ++ ++ msg.fd_offset[0] = foffset; ++ msg.len[0] = len; ++ msg.c_offset[0] = moffset; ++ msg.flags[0] = vhu_flags; ++ ++ if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) { ++ fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__, ++ (int)fi->fh); ++ ret = EINVAL; ++ } ++ ++ fuse_reply_err(req, ret); + } + + static void lo_removemapping(fuse_req_t req, struct fuse_session *se, +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch new file mode 100644 index 0000000000..59281423f3 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0012-DAX-virtiofsd-Make-lo_removemapping-work.patch @@ -0,0 +1,43 @@ +From 17cf13d652885b2c3a09fbbab1cb503f53c27d96 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Mon, 13 Aug 2018 11:52:43 -0400 +Subject: [PATCH 12/29] DAX: virtiofsd: Make lo_removemapping() work + +Let guest pass in the offset in dax window a mapping is currently +mapped at and needs to be removed. + +Signed-off-by: Vivek Goyal +--- + tools/virtiofsd/passthrough_ll.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b57cb4079e..056b395574 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2923,8 +2923,20 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se, + fuse_ino_t ino, unsigned num, + struct fuse_removemapping_one *argp) + { +- // TODO +- fuse_reply_err(req, ENOSYS); ++ VhostUserFSSlaveMsg msg = { 0 }; ++ int ret = 0; ++ ++ msg.len[0] = argp->len; ++ msg.c_offset[0] = argp->moffset; ++ if (fuse_virtio_unmap(se, &msg)) { ++ fprintf(stderr, ++ "%s: unmap over virtio failed " ++ "(offset=0x%lx, len=0x%lx)\n", ++ __func__, argp->moffset, argp->len); ++ ret = EINVAL; ++ } ++ ++ fuse_reply_err(req, ret); + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch new file mode 100644 index 0000000000..27308cc641 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0013-DAX-virtiofsd-Make-setupmapping-work-only-with-inode.patch @@ -0,0 +1,104 @@ +From a3f692a36307054148e7db640dc7a64158a98250 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Thu, 30 Aug 2018 14:22:10 -0400 +Subject: [PATCH 13/29] DAX: virtiofsd: Make setupmapping work only with inode + +Guest might not pass file pointer. In that case using inode info, open +the file again, mmap() and close fd. + +Signed-off-by: Vivek Goyal +With fix from: +Signed-off-by: Fotis Xenakis +--- + tools/virtiofsd/fuse_lowlevel.c | 13 ++++++++++-- + tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++++++++---- + 2 files changed, 43 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 1c3790130a..4cfd4c3547 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1897,8 +1897,17 @@ static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid, + } + + if (req->se->op.setupmapping) { +- req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len, +- arg->moffset, genflags, &fi); ++ /* ++ * TODO: Add a flag to request which tells if arg->fh is ++ * valid or not. ++ */ ++ if (fi.fh == (uint64_t)-1) { ++ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len, ++ arg->moffset, genflags, NULL); ++ } else { ++ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len, ++ arg->moffset, genflags, &fi); ++ } + } else { + fuse_reply_err(req, ENOSYS); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 056b395574..ebd5a9b215 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2895,11 +2895,19 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, + uint64_t len, uint64_t moffset, uint64_t flags, + struct fuse_file_info *fi) + { +- int ret = 0; ++ struct lo_data *lo = lo_data(req); ++ int ret = 0, fd, res; + VhostUserFSSlaveMsg msg = { 0 }; + uint64_t vhu_flags; ++ char *buf; + bool writable = flags & O_RDWR; + ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setupmapping(ino=%" PRIu64 ", fi=0x%p," ++ " foffset=%" PRIu64 ", len=%" PRIu64 ", moffset=%" PRIu64 ++ ", flags=%" PRIu64 ")\n", ++ ino, (void *)fi, foffset, len, moffset, flags); ++ + vhu_flags = VHOST_USER_FS_FLAG_MAP_R; + if (writable) { + vhu_flags |= VHOST_USER_FS_FLAG_MAP_W; +@@ -2910,12 +2918,32 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset, + msg.c_offset[0] = moffset; + msg.flags[0] = vhu_flags; + +- if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) { +- fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__, +- (int)fi->fh); ++ if (fi) { ++ fd = lo_fi_fd(req, fi); ++ } else { ++ res = asprintf(&buf, "%i", lo_fd(req, ino)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fd = openat(lo->proc_self_fd, buf, flags); ++ free(buf); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ } ++ ++ if (fuse_virtio_map(req, &msg, fd)) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: map over virtio failed (ino=%" PRId64 ++ "fd=%d moffset=0x%" PRIx64 ")\n", ++ __func__, ino, fi ? (int)fi->fh : lo_fd(req, ino), moffset); + ret = EINVAL; + } + ++ if (!fi) { ++ close(fd); ++ } + fuse_reply_err(req, ret); + } + +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch new file mode 100644 index 0000000000..1e2aa5c58f --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0014-DAX-virtiofsd-route-se-down-to-destroy-method.patch @@ -0,0 +1,75 @@ +From 7c14a24ad467b9404b95345c64e8c5ef5e6d209c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 30 Nov 2018 11:47:36 +0000 +Subject: [PATCH 14/29] DAX: virtiofsd: route se down to destroy method + +We're going to need to pass the session down to destroy so that it can +pass it back to do the remove mapping. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/fuse_lowlevel.c | 6 +++--- + tools/virtiofsd/fuse_lowlevel.h | 2 +- + tools/virtiofsd/passthrough_ll.c | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 4cfd4c3547..a2480d4aa1 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2211,7 +2211,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + se->got_destroy = 1; + se->got_init = 0; + if (se->op.destroy) { +- se->op.destroy(se->userdata); ++ se->op.destroy(se->userdata, se); + } + + send_reply_ok(req, NULL, 0); +@@ -2438,7 +2438,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + se->got_destroy = 1; + se->got_init = 0; + if (se->op.destroy) { +- se->op.destroy(se->userdata); ++ se->op.destroy(se->userdata, se); + } + } else { + goto reply_err; +@@ -2526,7 +2526,7 @@ void fuse_session_destroy(struct fuse_session *se) + { + if (se->got_init && !se->got_destroy) { + if (se->op.destroy) { +- se->op.destroy(se->userdata); ++ se->op.destroy(se->userdata, se); + } + } + pthread_rwlock_destroy(&se->init_rwlock); +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 2851840cc2..2259623776 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -208,7 +208,7 @@ struct fuse_lowlevel_ops { + * + * @param userdata the user data passed to fuse_session_new() + */ +- void (*destroy)(void *userdata); ++ void (*destroy)(void *userdata, struct fuse_session *se); + + /** + * Look up a directory entry by name and get its attributes. +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ebd5a9b215..0d3cda8d2f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2871,7 +2871,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + } + } + +-static void lo_destroy(void *userdata) ++static void lo_destroy(void *userdata, struct fuse_session *se) + { + struct lo_data *lo = (struct lo_data *)userdata; + +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch new file mode 100644 index 0000000000..e08fb14857 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0015-DAX-virtiofsd-Perform-an-unmap-on-destroy.patch @@ -0,0 +1,37 @@ +From 72bccc497aeb9057e36477c327e0ac58bc154e6f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 30 Nov 2018 11:50:25 +0000 +Subject: [PATCH 15/29] DAX: virtiofsd: Perform an unmap on destroy + +Force unmap all remaining dax cache entries on a destroy. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/passthrough_ll.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0d3cda8d2f..56a4b9404a 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2875,6 +2875,17 @@ static void lo_destroy(void *userdata, struct fuse_session *se) + { + struct lo_data *lo = (struct lo_data *)userdata; + ++ if (fuse_lowlevel_is_virtio(se)) { ++ VhostUserFSSlaveMsg msg = { 0 }; ++ ++ msg.len[0] = ~(uint64_t)0; /* Special: means 'all' */ ++ msg.c_offset[0] = 0; ++ if (fuse_virtio_unmap(se, &msg)) { ++ fuse_log(FUSE_LOG_ERR, "%s: unmap during destroy failed\n", ++ __func__); ++ } ++ } ++ + pthread_mutex_lock(&lo->mutex); + while (true) { + GHashTableIter iter; +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch new file mode 100644 index 0000000000..80d60b8a05 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0016-DAX-libvhost-user-Allow-popping-a-queue-element-with.patch @@ -0,0 +1,34 @@ +From c05795e129152533d66f131dd019ae903d1eb39a Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 2 May 2019 18:04:04 +0100 +Subject: [PATCH 16/29] DAX: libvhost-user: Allow popping a queue element with + bad pointers + +Allow a daemon implemented with libvhost-user to accept an +element with pointers to memory that aren't in the mapping table. +The daemon might have some special way to deal with some special +cases of this. + +The default behaviour doesn't change. + +Signed-off-by: Dr. David Alan Gilbert +--- + block/export/vhost-user-blk-server.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index ab2c4d44c4..ea2d302e33 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -205,7 +205,7 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) + while (1) { + VuBlkReq *req; + +- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); ++ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq), NULL, NULL); + if (!req) { + break; + } +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch new file mode 100644 index 0000000000..3b843ae172 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0017-DAX-unmap-virtiofsd-Add-VHOST_USER_SLAVE_FS_IO.patch @@ -0,0 +1,211 @@ +From a238faf5a53668aac037f7ce026d1bf785ee4186 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 11:54:02 +0100 +Subject: [PATCH 17/29] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO + +Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a +client to ask qemu to perform a read/write from an fd directly +to GPA. + +Signed-off-by: Dr. David Alan Gilbert +--- + docs/interop/vhost-user.rst | 11 +++ + hw/virtio/trace-events | 6 ++ + hw/virtio/vhost-user-fs.c | 87 +++++++++++++++++++++++ + hw/virtio/vhost-user.c | 4 ++ + include/hw/virtio/vhost-user-fs.h | 1 + + subprojects/libvhost-user/libvhost-user.h | 1 + + 6 files changed, 110 insertions(+) + +diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst +index 056f94c6fb..8d6ec92881 100644 +--- a/docs/interop/vhost-user.rst ++++ b/docs/interop/vhost-user.rst +@@ -1455,6 +1455,17 @@ Slave message types + ``VHOST_USER_SLAVE_FS_SYNC`` + [Semantic details TBD] + ++``VHOST_USER_SLAVE_FS_IO`` ++ :id: 9 ++ :equivalent ioctl: N/A ++ :slave payload: fd + n * (offset + address + len) ++ :master payload: N/A ++ ++ Requests that the QEMU performs IO directly from an fd to guest memory ++ on behalf of the daemon; this is normally for a case where a memory region ++ isn't visible to the daemon. ++ [Semantic details TBD] ++ + .. _reply_ack: + + VHOST_USER_PROTOCOL_F_REPLY_ACK +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 2060a144a2..a35adf5caf 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_set_owner(void *dev) "dev: %p" + vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 + ++# vhost-user-fs.c ++ ++vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd" ++vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd" ++vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd" ++ + # virtio.c + virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" + virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 98cec993f7..82a32492a7 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -22,6 +22,8 @@ + #include "qemu/error-report.h" + #include "hw/virtio/vhost-user-fs.h" + #include "monitor/monitor.h" ++#include "exec/address-spaces.h" ++#include "trace.h" + + /* + * The powerpc kernel code expects the memory to be accessible during +@@ -201,6 +203,91 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + return res; + } + ++int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd) ++{ ++ VHostUserFS *fs = VHOST_USER_FS(dev->vdev); ++ if (!fs) { ++ /* Shouldn't happen - but seen it in error paths */ ++ fprintf(stderr, "%s: Bad fs ptr\n", __func__); ++ return -1; ++ } ++ ++ unsigned int i; ++ int res = 0; ++ size_t done = 0; ++ ++ if (fd < 0) { ++ fprintf(stderr, "%s: Bad fd for map\n", __func__); ++ return -1; ++ } ++ ++ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) { ++ if (sm->len[i] == 0) { ++ continue; ++ } ++ ++ size_t len = sm->len[i]; ++ hwaddr gpa = sm->c_offset[i]; ++ ++ while (len && !res) { ++ MemoryRegionSection mrs = memory_region_find(get_system_memory(), ++ gpa, len); ++ size_t mrs_size = (size_t)int128_get64(mrs.size); ++ ++ if (!mrs_size) { ++ fprintf(stderr, ++ "%s: No guest region found for 0x%" HWADDR_PRIx "\n", ++ __func__, gpa); ++ res = -EFAULT; ++ break; ++ } ++ ++ trace_vhost_user_fs_slave_io_loop(mrs.mr->name, ++ (uint64_t)mrs.offset_within_region, ++ memory_region_is_ram(mrs.mr), ++ memory_region_is_romd(mrs.mr), ++ (size_t)mrs_size); ++ ++ void *hostptr = qemu_map_ram_ptr(mrs.mr->ram_block, ++ mrs.offset_within_region); ++ ssize_t transferred; ++ if (sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) { ++ /* Read from file into RAM */ ++ if (mrs.mr->readonly) { ++ res = -EFAULT; ++ break; ++ } ++ transferred = pread(fd, hostptr, mrs_size, sm->fd_offset[i]); ++ } else { ++ /* Write into file from RAM */ ++ assert((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W)); ++ transferred = pwrite(fd, hostptr, mrs_size, sm->fd_offset[i]); ++ } ++ trace_vhost_user_fs_slave_io_loop_res(transferred); ++ if (transferred < 0) { ++ res = -errno; ++ break; ++ } ++ if (!transferred) { ++ /* EOF */ ++ break; ++ } ++ ++ done += transferred; ++ len -= transferred; ++ } ++ } ++ close(fd); ++ ++ trace_vhost_user_fs_slave_io_exit(res, done); ++ /* ++ * TODO! We should be returning 'done' if possible but our error handling ++ * doesn't know about that yet. ++ */ ++ return res; ++} ++ + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 757dee0d1e..b4ef0102ad 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_FS_MAP = 6, + VHOST_USER_SLAVE_FS_UNMAP = 7, + VHOST_USER_SLAVE_FS_SYNC = 8, ++ VHOST_USER_SLAVE_FS_IO = 9, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +@@ -1487,6 +1488,9 @@ static void slave_read(void *opaque) + case VHOST_USER_SLAVE_FS_SYNC: + ret = vhost_user_fs_slave_sync(dev, &payload.fs); + break; ++ case VHOST_USER_SLAVE_FS_IO: ++ ret = vhost_user_fs_slave_io(dev, &payload.fs, fd[0]); ++ break; + #endif + default: + error_report("Received unexpected msg type: %d.", hdr.request); +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 69cc6340ed..0750687463 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -68,5 +68,6 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + int fd); + int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); + int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); ++int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd); + + #endif /* _QEMU_VHOST_USER_FS_H */ +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index c63a590069..4b6e681a3e 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -122,6 +122,7 @@ typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_FS_MAP = 6, + VHOST_USER_SLAVE_FS_UNMAP = 7, + VHOST_USER_SLAVE_FS_SYNC = 8, ++ VHOST_USER_SLAVE_FS_IO = 9, + VHOST_USER_SLAVE_MAX + } VhostUserSlaveRequest; + +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch new file mode 100644 index 0000000000..23b31b1791 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0018-DAX-unmap-virtiofsd-Add-wrappers-for-VHOST_USER_SLAV.patch @@ -0,0 +1,98 @@ +From 5e3aff71f01f41254cdc7ecefc98a31be002dda0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 12:17:36 +0100 +Subject: [PATCH 18/29] DAX/unmap virtiofsd: Add wrappers for + VHOST_USER_SLAVE_FS_IO + +Add a wrapper to send VHOST_USER_SLAVE_FS_IO commands and a +further wrapper for sending a fuse_buf write using the FS_IO +slave command. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/fuse_lowlevel.h | 24 +++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.c | 38 +++++++++++++++++++++++++++++++++ + 2 files changed, 62 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 2259623776..866d122352 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -2022,4 +2022,28 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); + */ + int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); + ++/** ++ * For use with virtio-fs; request IO directly to memory ++ * ++ * @param se The current session ++ * @param msg A set of IO requests ++ * @param fd The fd to map ++ * @return Zero on success ++ */ ++int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd); ++ ++/** ++ * For use with virtio-fs; wrapper for fuse_virtio_io for writes ++ * from memory to an fd ++ * @param req The request that triggered this action ++ * @param dst The destination (file) memory buffer ++ * @param dst_off Byte offset in the file ++ * @param src The source (memory) buffer ++ * @param src_off The GPA ++ * @param len Length in bytes ++ */ ++ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, ++ size_t dst_off, const struct fuse_buf *src, ++ size_t src_off, size_t len); ++ + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 24d9323665..abac0d0d2e 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -1071,3 +1071,41 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) + return !vu_fs_cache_request(&req->se->virtio_dev->dev, + VHOST_USER_SLAVE_FS_SYNC, -1, msg); + } ++ ++int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd) ++{ ++ if (!se->virtio_dev) { ++ return -ENODEV; ++ } ++ return !vu_fs_cache_request(&se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_IO, fd, msg); ++} ++ ++/* ++ * Write to a file (dst) from an area of guest GPA (src) that probably ++ * isn't visible to the daemon. ++ */ ++ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, ++ size_t dst_off, const struct fuse_buf *src, ++ size_t src_off, size_t len) ++{ ++ VhostUserFSSlaveMsg msg = { 0 }; ++ ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ msg.fd_offset[0] = dst->pos + dst_off; ++ } else { ++ off_t cur = lseek(dst->fd, 0, SEEK_CUR); ++ if (cur == (off_t)-1) { ++ return -errno; ++ } ++ msg.fd_offset[0] = cur; ++ } ++ msg.c_offset[0] = (uintptr_t)src->mem + src_off; ++ msg.len[0] = len; ++ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W; ++ ++ bool result = !fuse_virtio_io(req->se, &msg, dst->fd); ++ /* TODO: Rework the result path to actually get length/error */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result); ++ return result ? len : -EIO; ++} +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch new file mode 100644 index 0000000000..5649391a30 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0019-DAX-unmap-virtiofsd-Parse-unmappable-elements.patch @@ -0,0 +1,335 @@ +From 1586d4a5525f44c51cbcbd5004b9a79bfc8c495c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 13:26:09 +0100 +Subject: [PATCH 19/29] DAX/unmap virtiofsd: Parse unmappable elements + +For some read/writes the virtio queue elements are unmappable by +the daemon; these are cases where the data is to be read/written +from non-RAM. In viritofs's case this is typically a direct read/write +into an mmap'd DAX file also on virtiofs (possibly on another instance). + +When we receive a virtio queue element, check that we have enough +mappable data to handle the headers. Make a note of the number of +unmappable 'in' entries (ie. for read data back to the VMM), +and flag the fuse_bufvec for 'out' entries with a new flag +FUSE_BUF_PHYS_ADDR. + +Signed-off-by: Dr. David Alan Gilbert +with fix by: +Signed-off-by: Liu Bo +--- + tools/virtiofsd/buffer.c | 4 +- + tools/virtiofsd/fuse_common.h | 7 ++ + tools/virtiofsd/fuse_virtio.c | 191 ++++++++++++++++++++++++---------- + 3 files changed, 145 insertions(+), 57 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 874f01c488..1a050aa441 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -77,6 +77,7 @@ static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, + ssize_t res = 0; + size_t copied = 0; + ++ assert(!(src->flags & FUSE_BUF_PHYS_ADDR)); + while (len) { + if (dst->flags & FUSE_BUF_FD_SEEK) { + res = pwrite(dst->fd, (char *)src->mem + src_off, len, +@@ -272,7 +273,8 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + * process + */ + for (i = 0; i < srcv->count; i++) { +- if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { ++ if ((srcv->buf[i].flags & FUSE_BUF_PHYS_ADDR) || ++ (srcv->buf[i].flags & FUSE_BUF_IS_FD)) { + break; + } + } +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index a090040bb2..ed9280de91 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -611,6 +611,13 @@ enum fuse_buf_flags { + * detected. + */ + FUSE_BUF_FD_RETRY = (1 << 3), ++ ++ /** ++ * The addresses in the iovec represent guest physical addresses ++ * that can't be mapped by the daemon process. ++ * IO must be bounced back to the VMM to do it. ++ */ ++ FUSE_BUF_PHYS_ADDR = (1 << 4), + }; + + /** +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index abac0d0d2e..31f17ab043 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -49,6 +49,10 @@ typedef struct { + VuVirtqElement elem; + struct fuse_chan ch; + ++ /* Number of unmappable iovecs */ ++ unsigned bad_in_num; ++ unsigned bad_out_num; ++ + /* Used to complete requests that involve no reply */ + bool reply_sent; + } FVRequest; +@@ -291,8 +295,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; ++ unsigned int bad_in_num = req->bad_in_num; + struct iovec *in_sg = elem->in_sg; + size_t in_len = iov_size(in_sg, in_num); ++ size_t in_len_writeable = iov_size(in_sg, in_num - bad_in_num); + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", + __func__, elem->index, in_num, in_len); + +@@ -300,7 +306,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + * The elem should have room for a 'fuse_out_header' (out from fuse) + * plus the data based on the len in the header. + */ +- if (in_len < sizeof(struct fuse_out_header)) { ++ if (in_len_writeable < sizeof(struct fuse_out_header)) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", + __func__, elem->index); + ret = E2BIG; +@@ -327,7 +333,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); + /* These get updated as we skip */ + struct iovec *in_sg_ptr = in_sg_cpy; +- int in_sg_cpy_count = in_num; ++ int in_sg_cpy_count = in_num - bad_in_num; + + /* skip over parts of in_sg that contained the header iov */ + size_t skip_size = iov_len; +@@ -460,17 +466,21 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + + /* The 'out' part of the elem is from qemu */ + unsigned int out_num = elem->out_num; ++ unsigned int out_num_readable = out_num - req->bad_out_num; + struct iovec *out_sg = elem->out_sg; + size_t out_len = iov_size(out_sg, out_num); ++ size_t out_len_readable = iov_size(out_sg, out_num_readable); + fuse_log(FUSE_LOG_DEBUG, +- "%s: elem %d: with %d out desc of length %zd\n", +- __func__, elem->index, out_num, out_len); ++ "%s: elem %d: with %d out desc of length %zd" ++ " bad_in_num=%u bad_out_num=%u\n", ++ __func__, elem->index, out_num, out_len, req->bad_in_num, ++ req->bad_out_num); + + /* + * The elem should contain a 'fuse_in_header' (in to fuse) + * plus the data based on the len in the header. + */ +- if (out_len < sizeof(struct fuse_in_header)) { ++ if (out_len_readable < sizeof(struct fuse_in_header)) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", + __func__, elem->index); + assert(0); /* TODO */ +@@ -484,63 +494,129 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + copy_from_iov(&fbuf, 1, out_sg); + + pbufv = NULL; /* Compiler thinks an unitialised path */ +- if (out_num > 2 && +- out_sg[0].iov_len == sizeof(struct fuse_in_header) && +- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && +- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { +- /* +- * For a write we don't actually need to copy the +- * data, we can just do it straight out of guest memory +- * but we must still copy the headers in case the guest +- * was nasty and changed them while we were using them. +- */ +- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); +- +- /* copy the fuse_write_in header afte rthe fuse_in_header */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; +- +- /* Allocate the bufv, with space for the rest of the iov */ +- pbufv = malloc(sizeof(struct fuse_bufvec) + +- sizeof(struct fuse_buf) * (out_num - 2)); +- if (!pbufv) { +- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", +- __func__); +- goto out; +- } ++ if (req->bad_in_num || req->bad_out_num) { ++ bool handled_unmappable = false; ++ ++ if (out_num > 2 && out_num_readable >= 2 && !req->bad_in_num && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ handled_unmappable = true; ++ ++ /* copy the fuse_write_in header after fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } + +- allocated_bufv = true; +- pbufv->count = 1; +- pbufv->buf[0] = fbuf; ++ allocated_bufv = true; ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = ++ (iovindex < out_num_readable) ? 0 : ++ FUSE_BUF_PHYS_ADDR; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } + +- size_t iovindex, pbufvindex; +- iovindex = 2; /* 2 headers, separate iovs */ +- pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ if (out_num == 2 && out_num_readable == 2 && req->bad_in_num && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_READ && ++ out_sg[1].iov_len == sizeof(struct fuse_read_in)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "Unmappable read case " ++ "in_num=%d bad_in_num=%d\n", ++ elem->in_num, req->bad_in_num); ++ handled_unmappable = true; ++ } + +- for (; iovindex < out_num; iovindex++, pbufvindex++) { +- pbufv->count++; +- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ +- pbufv->buf[pbufvindex].flags = 0; +- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; +- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ if (!handled_unmappable) { ++ fuse_log(FUSE_LOG_ERR, ++ "Unhandled unmappable element: out: %d(b:%d) in: " ++ "%d(b:%d)", ++ out_num, req->bad_out_num, elem->in_num, req->bad_in_num); ++ fv_panic(dev, "Unhandled unmappable element"); + } +- } else { +- /* Normal (non fast write) path */ ++ } ++ ++ if (!req->bad_out_num) { ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", ++ __func__); ++ ++ /* copy the fuse_write_in header after fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } + +- /* Copy the rest of the buffer */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_len; ++ allocated_bufv = true; ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; + +- /* TODO! Endianness of header */ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ + +- /* TODO: Add checks for fuse_session_exited */ +- bufv.buf[0] = fbuf; +- bufv.count = 1; +- pbufv = &bufv; ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } + } + pbufv->idx = 0; + pbufv->off = 0; +@@ -657,13 +733,16 @@ static void *fv_queue_thread(void *opaque) + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + + while (1) { ++ unsigned int bad_in_num = 0, bad_out_num = 0; + FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest), +- NULL, NULL); ++ &bad_in_num, &bad_out_num); + if (!req) { + break; + } + + req->reply_sent = false; ++ req->bad_in_num = bad_in_num; ++ req->bad_out_num = bad_out_num; + + if (!se->thread_pool_size) { + req_list = g_list_prepend(req_list, req); +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch new file mode 100644 index 0000000000..40b429488e --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0020-DAX-unmap-virtiofsd-Route-unmappable-reads.patch @@ -0,0 +1,56 @@ +From 1f6a9f8567bdf2be00d217abac33a71248541a4a Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 13:26:51 +0100 +Subject: [PATCH 20/29] DAX/unmap virtiofsd: Route unmappable reads + +When a read with unmappable buffers is found, map it to a slave +read command. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/fuse_virtio.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 31f17ab043..1f4c7fff35 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -397,6 +397,35 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + in_sg_left -= ret; + len -= ret; + } while (in_sg_left); ++ ++ if (bad_in_num) { ++ while (len && bad_in_num) { ++ VhostUserFSSlaveMsg msg = { 0 }; ++ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_R; ++ msg.fd_offset[0] = buf->buf[0].pos; ++ msg.c_offset[0] = (uint64_t)(uintptr_t)in_sg_ptr[0].iov_base; ++ msg.len[0] = in_sg_ptr[0].iov_len; ++ if (len < msg.len[0]) { ++ msg.len[0] = len; ++ } ++ bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd " ++ "c_offset=%p req_res=%d\n", ++ __func__, len, bad_in_num, buf->buf[0].pos, ++ in_sg_ptr[0].iov_base, req_res); ++ if (req_res) { ++ len -= msg.len[0]; ++ buf->buf[0].pos += msg.len[0]; ++ in_sg_ptr++; ++ bad_in_num--; ++ } else { ++ ret = EIO; ++ free(in_sg_cpy); ++ goto err; ++ } ++ } ++ } + free(in_sg_cpy); + + /* Need to fix out->len on EOF */ +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch new file mode 100644 index 0000000000..f8a73044b9 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0021-DAX-unmap-virtiofsd-route-unmappable-write-to-slave-.patch @@ -0,0 +1,121 @@ +From e291b7766f49b06933afed374b6476416d951517 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 13:18:42 +0100 +Subject: [PATCH 21/29] DAX/unmap virtiofsd: route unmappable write to slave + command + +When a fuse_buf_copy is performed on an element with FUSE_BUF_PHYS_ADDR +route it to a fuse_virtio_write request that does a slave command to +perform the write. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/buffer.c | 14 +++++++++++--- + tools/virtiofsd/fuse_common.h | 6 +++++- + tools/virtiofsd/fuse_lowlevel.h | 3 --- + tools/virtiofsd/passthrough_ll.c | 2 +- + 4 files changed, 17 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 1a050aa441..8135d52d2a 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -200,13 +200,20 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + return copied; + } + +-static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, ++static ssize_t fuse_buf_copy_one(fuse_req_t req, ++ const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len) + { + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ int src_is_phys = src->flags & FUSE_BUF_PHYS_ADDR; ++ int dst_is_phys = src->flags & FUSE_BUF_PHYS_ADDR; + ++ if (src_is_phys && !src_is_fd && dst_is_fd) { ++ return fuse_virtio_write(req, dst, dst_off, src, src_off, len); ++ } ++ assert(!src_is_phys && !dst_is_phys); + if (!src_is_fd && !dst_is_fd) { + char *dstmem = (char *)dst->mem + dst_off; + char *srcmem = (char *)src->mem + src_off; +@@ -259,7 +266,8 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + return 1; + } + +-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) ++ssize_t fuse_buf_copy(fuse_req_t req, struct fuse_bufvec *dstv, ++ struct fuse_bufvec *srcv) + { + size_t copied = 0, i; + +@@ -301,7 +309,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); ++ res = fuse_buf_copy_one(req, dst, dstv->off, src, srcv->off, len); + if (res < 0) { + if (!copied) { + return res; +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index ed9280de91..05d56883dd 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -495,6 +495,8 @@ struct fuse_conn_info { + struct fuse_session; + struct fuse_pollhandle; + struct fuse_conn_info_opts; ++struct fuse_req; ++typedef struct fuse_req *fuse_req_t; + + /** + * This function parses several command-line options that can be used +@@ -713,11 +715,13 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + /** + * Copy data from one buffer vector to another + * ++ * @param req The request this copy is part of + * @param dst destination buffer vector + * @param src source buffer vector + * @return actual number of bytes copied or -errno on error + */ +-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); ++ssize_t fuse_buf_copy(fuse_req_t req, ++ struct fuse_bufvec *dst, struct fuse_bufvec *src); + + /** + * Memory buffer iterator +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 866d122352..e543f64177 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -42,9 +42,6 @@ + /** Inode number type */ + typedef uint64_t fuse_ino_t; + +-/** Request pointer type */ +-typedef struct fuse_req *fuse_req_t; +- + /** + * Session + * +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 56a4b9404a..ab33fabcda 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2063,7 +2063,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + } + } + +- res = fuse_buf_copy(&out_buf, in_buf); ++ res = fuse_buf_copy(req, &out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch new file mode 100644 index 0000000000..b88bcb4a60 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0022-DAX-vhost-user-Rework-slave-return-values.patch @@ -0,0 +1,350 @@ +From 2a64df420827ff0b127a30f2ac877a7b1ded925b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 18:08:41 +0100 +Subject: [PATCH 22/29] DAX: vhost-user: Rework slave return values + +All the current slave handlers on the qemu side generate an 'int' +return value that's squashed down to a bool (!!ret) and stuffed into +a uint64_t (field of a union) to be returned. + +Move the uint64_t type back up through the individual handlers so +that we can mkae one actually return a full uint64_t. + +Note that the definition in the interop spec says most of these +cases are defined as returning 0 on success and non-0 for failure, +so it's OK to change from a bool to another non-0. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/virtio/vhost-backend.c | 4 +-- + hw/virtio/vhost-user-fs.c | 42 ++++++++++++++++--------------- + hw/virtio/vhost-user.c | 32 ++++++++++++----------- + include/hw/virtio/vhost-backend.h | 2 +- + include/hw/virtio/vhost-user-fs.h | 13 ++++++---- + 5 files changed, 50 insertions(+), 43 deletions(-) + +diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c +index 222bbcc62d..e81083ddda 100644 +--- a/hw/virtio/vhost-backend.c ++++ b/hw/virtio/vhost-backend.c +@@ -401,7 +401,7 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + return -ENODEV; + } + +-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, ++uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg) + { + int ret = 0; +@@ -424,5 +424,5 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + break; + } + +- return ret; ++ return !!ret; + } +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index 82a32492a7..c02dcaeca7 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -35,19 +35,19 @@ + #define DAX_WINDOW_PROT PROT_NONE + #endif + +-int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, +- int fd) ++uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd) + { + VHostUserFS *fs = VHOST_USER_FS(dev->vdev); + if (!fs) { + /* Shouldn't happen - but seen on error path */ + fprintf(stderr, "%s: Bad fs ptr\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + size_t cache_size = fs->conf.cache_size; + if (!cache_size) { + fprintf(stderr, "%s: map when DAX cache not present\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + void *cache_host = memory_region_get_ram_ptr(&fs->cache); + +@@ -56,7 +56,7 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + + if (fd < 0) { + fprintf(stderr, "%s: Bad fd for map\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + + for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) { +@@ -78,11 +78,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0), + MAP_SHARED | MAP_FIXED, + fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) { ++ res = -errno; + fprintf(stderr, "%s: map failed err %d [%d] %" + PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, + errno, i, sm->c_offset[i], sm->len[i], + sm->fd_offset[i]); +- res = -1; + break; + } + } +@@ -91,10 +91,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + /* Something went wrong, unmap them all */ + vhost_user_fs_slave_unmap(dev, sm); + } +- return res; ++ return (uint64_t)res; + } + +-int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) ++uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, ++ VhostUserFSSlaveMsg *sm) + { + VHostUserFS *fs = VHOST_USER_FS(dev->vdev); + if (!fs) { +@@ -114,7 +115,7 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + } + + fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + void *cache_host = memory_region_get_ram_ptr(&fs->cache); + +@@ -148,26 +149,27 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr != (cache_host + sm->c_offset[i])) { ++ res = -errno; + fprintf(stderr, "%s: mmap failed (%s) [%d] %" + PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n", + __func__, + strerror(errno), + i, sm->c_offset[i], sm->len[i], + sm->fd_offset[i], ptr); +- res = -1; + } + } + +- return res; ++ return (uint64_t)res; + } + +-int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) ++uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev, ++ VhostUserFSSlaveMsg *sm) + { + VHostUserFS *fs = VHOST_USER_FS(dev->vdev); + size_t cache_size = fs->conf.cache_size; + if (!cache_size) { + fprintf(stderr, "%s: sync when DAX cache not present\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + void *cache_host = memory_region_get_ram_ptr(&fs->cache); + +@@ -191,26 +193,26 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm) + + if (msync(cache_host + sm->c_offset[i], sm->len[i], + MS_SYNC /* ?? */)) { ++ res = -errno; + fprintf(stderr, "%s: msync failed (%s) [%d] %" + PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__, + strerror(errno), + i, sm->c_offset[i], sm->len[i], + sm->fd_offset[i]); +- res = -1; + } + } + +- return res; ++ return (uint64_t)res; + } + +-int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, +- int fd) ++uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd) + { + VHostUserFS *fs = VHOST_USER_FS(dev->vdev); + if (!fs) { + /* Shouldn't happen - but seen it in error paths */ + fprintf(stderr, "%s: Bad fs ptr\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + + unsigned int i; +@@ -219,7 +221,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + + if (fd < 0) { + fprintf(stderr, "%s: Bad fd for map\n", __func__); +- return -1; ++ return (uint64_t)-1; + } + + for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) { +@@ -285,7 +287,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + * TODO! We should be returning 'done' if possible but our error handling + * doesn't know about that yet. + */ +- return res; ++ return (uint64_t)res; + } + + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index b4ef0102ad..d95dbc39e3 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1325,24 +1325,25 @@ static int vhost_user_reset_device(struct vhost_dev *dev) + return 0; + } + +-static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) ++static uint64_t vhost_user_slave_handle_config_change(struct vhost_dev *dev) + { + int ret = -1; + + if (!dev->config_ops) { +- return -1; ++ return true; + } + + if (dev->config_ops->vhost_dev_config_notifier) { + ret = dev->config_ops->vhost_dev_config_notifier(dev); + } + +- return ret; ++ return !!ret; + } + +-static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +- VhostUserVringArea *area, +- int fd) ++static uint64_t vhost_user_slave_handle_vring_host_notifier( ++ struct vhost_dev *dev, ++ VhostUserVringArea *area, ++ int fd) + { + int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; + size_t page_size = qemu_real_host_page_size; +@@ -1356,7 +1357,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || + vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { +- return -1; ++ return true; + } + + n = &user->notifier[queue_idx]; +@@ -1369,18 +1370,18 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + } + + if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { +- return 0; ++ return false; + } + + /* Sanity check. */ + if (area->size != page_size) { +- return -1; ++ return true; + } + + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, area->offset); + if (addr == MAP_FAILED) { +- return -1; ++ return true; + } + + name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", +@@ -1391,13 +1392,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, + + if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { + munmap(addr, page_size); +- return -1; ++ return true; + } + + n->addr = addr; + n->set = true; + +- return 0; ++ return false; + } + + static void slave_read(void *opaque) +@@ -1406,7 +1407,8 @@ static void slave_read(void *opaque) + struct vhost_user *u = dev->opaque; + VhostUserHeader hdr = { 0, }; + VhostUserPayload payload = { 0, }; +- int size, ret = 0; ++ int size; ++ uint64_t ret = 0; + struct iovec iov; + struct msghdr msgh; + int fd[VHOST_USER_SLAVE_MAX_FDS]; +@@ -1494,7 +1496,7 @@ static void slave_read(void *opaque) + #endif + default: + error_report("Received unexpected msg type: %d.", hdr.request); +- ret = -EINVAL; ++ ret = (uint64_t)-EINVAL; + } + + /* Close the remaining file descriptors. */ +@@ -1515,7 +1517,7 @@ static void slave_read(void *opaque) + hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr.flags |= VHOST_USER_REPLY_MASK; + +- payload.u64 = !!ret; ++ payload.u64 = ret; + hdr.size = sizeof(payload.u64); + + iovec[0].iov_base = &hdr; +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index 8a6f8e2a7a..64ac6b6444 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -186,7 +186,7 @@ int vhost_backend_update_device_iotlb(struct vhost_dev *dev, + int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len); + +-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, ++uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); + + int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 0750687463..845cdb0177 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -64,10 +64,13 @@ struct VHostUserFS { + }; + + /* Callbacks from the vhost-user code for slave commands */ +-int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, +- int fd); +-int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); +-int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm); +-int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd); ++uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, ++ int fd); ++uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, ++ VhostUserFSSlaveMsg *sm); ++uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev, ++ VhostUserFSSlaveMsg *sm); ++uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, ++ VhostUserFSSlaveMsg *sm, int fd); + + #endif /* _QEMU_VHOST_USER_FS_H */ +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch new file mode 100644 index 0000000000..59d655e000 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0023-DAX-libvhost-user-Route-slave-message-payload.patch @@ -0,0 +1,97 @@ +From 55b6372e1b893e77c6c4d5e87bd1a0765126399c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 20 May 2019 20:02:29 +0100 +Subject: [PATCH 23/29] DAX: libvhost-user: Route slave message payload + +Route the uint64 payload from message replies on the slave back up +through vu_process_message_reply and to the callers. + +Signed-off-by: Dr. David Alan Gilbert +--- + subprojects/libvhost-user/libvhost-user.c | 20 ++++++++++++++++---- + tools/virtiofsd/fuse_virtio.c | 2 ++ + 2 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c +index 9b8223b5d5..a1cbb626d2 100644 +--- a/subprojects/libvhost-user/libvhost-user.c ++++ b/subprojects/libvhost-user/libvhost-user.c +@@ -403,9 +403,11 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) + * Processes a reply on the slave channel. + * Entered with slave_mutex held and releases it before exit. + * Returns true on success. ++ * *payload is written on success + */ + static bool +-vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) ++vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg, ++ uint64_t *payload) + { + VhostUserMsg msg_reply; + bool result = false; +@@ -425,7 +427,8 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) + goto out; + } + +- result = msg_reply.payload.u64 == 0; ++ *payload = msg_reply.payload.u64; ++ result = true; + + out: + pthread_mutex_unlock(&dev->slave_mutex); +@@ -1312,6 +1315,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + { + int qidx = vq - dev->vq; + int fd_num = 0; ++ bool res; ++ uint64_t payload = 0; + VhostUserMsg vmsg = { + .request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, +@@ -1342,7 +1347,10 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + } + + /* Also unlocks the slave_mutex */ +- return vu_process_message_reply(dev, &vmsg); ++ res = vu_process_message_reply(dev, &vmsg, &payload); ++ res = res && (payload == 0); ++ ++ return res; + } + + static bool +@@ -2915,6 +2923,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, + VhostUserFSSlaveMsg *fsm) + { + int fd_num = 0; ++ bool res; ++ uint64_t payload = 0; + VhostUserMsg vmsg = { + .request = req, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, +@@ -2939,6 +2949,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, + } + + /* Also unlocks the slave_mutex */ +- return vu_process_message_reply(dev, &vmsg); ++ res = vu_process_message_reply(dev, &vmsg, &payload); ++ res = res && (payload == 0); ++ return res; + } + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 1f4c7fff35..416d285844 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -419,6 +419,8 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + buf->buf[0].pos += msg.len[0]; + in_sg_ptr++; + bad_in_num--; ++ } else if (req_res == 0) { ++ break; + } else { + ret = EIO; + free(in_sg_cpy); +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch new file mode 100644 index 0000000000..d8f94aeb03 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0024-DAX-virtiofsd-Rework-fs-cache-request-error-path.patch @@ -0,0 +1,240 @@ +From 5e0e90706b03fa71072b6b17779e0a66cb14aa64 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 21 May 2019 15:10:05 +0100 +Subject: [PATCH 24/29] DAX: virtiofsd: Rework fs-cache-request error path + +Rework error values all the way back to the guest for IO requests. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/virtio/vhost-user-fs.c | 9 +++-- + subprojects/libvhost-user/libvhost-user.c | 18 ++++++---- + subprojects/libvhost-user/libvhost-user.h | 6 ++-- + tools/virtiofsd/fuse_lowlevel.h | 11 ++++--- + tools/virtiofsd/fuse_virtio.c | 40 +++++++++++------------ + 5 files changed, 45 insertions(+), 39 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index c02dcaeca7..b43725824f 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -283,11 +283,10 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, + close(fd); + + trace_vhost_user_fs_slave_io_exit(res, done); +- /* +- * TODO! We should be returning 'done' if possible but our error handling +- * doesn't know about that yet. +- */ +- return (uint64_t)res; ++ if (res < 0) { ++ return (uint64_t)res; ++ } ++ return (uint64_t)done; + } + + static void vuf_get_config(VirtIODevice *vdev, uint8_t *config) +diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c +index a1cbb626d2..4cf4aef63d 100644 +--- a/subprojects/libvhost-user/libvhost-user.c ++++ b/subprojects/libvhost-user/libvhost-user.c +@@ -2919,8 +2919,8 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, + vu_queue_inflight_post_put(dev, vq, elem->index); + } + +-bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, +- VhostUserFSSlaveMsg *fsm) ++int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm) + { + int fd_num = 0; + bool res; +@@ -2939,18 +2939,24 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, + vmsg.fd_num = fd_num; + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { +- return false; ++ return -EINVAL; + } + + pthread_mutex_lock(&dev->slave_mutex); + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { + pthread_mutex_unlock(&dev->slave_mutex); +- return false; ++ return -EIO; + } + + /* Also unlocks the slave_mutex */ + res = vu_process_message_reply(dev, &vmsg, &payload); +- res = res && (payload == 0); +- return res; ++ if (!res) { ++ return -EIO; ++ } ++ /* ++ * Payload is delivered as uint64_t but is actually signed for ++ * errors. ++ */ ++ return (int64_t)payload; + } + +diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h +index 4b6e681a3e..ee75d4931f 100644 +--- a/subprojects/libvhost-user/libvhost-user.h ++++ b/subprojects/libvhost-user/libvhost-user.h +@@ -723,9 +723,9 @@ bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, + * @fd: an fd (only required for map, else must be -1) + * @fsm: The body of the message + * +- * Returns: true if the reply was 0 ++ * Returns: 0 or above for success, negative errno on error + */ +-bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, +- VhostUserFSSlaveMsg *fsm); ++int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd, ++ VhostUserFSSlaveMsg *fsm); + + #endif /* LIBVHOST_USER_H */ +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index e543f64177..a36a893871 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1998,7 +1998,7 @@ int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + * @param fd The fd to map + * @return Zero on success + */ +-int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); ++int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); + + /** + * For use with virtio-fs; request unmapping of part of the cache +@@ -2007,7 +2007,7 @@ int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd); + * @param msg A set of unmapping requests + * @return Zero on success + */ +-int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); ++int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); + + /** + * For use with virtio-fs; request synchronisation of part of the cache +@@ -2017,7 +2017,7 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg); + * @param msg A set of syncing requests + * @return Zero on success + */ +-int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); ++int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); + + /** + * For use with virtio-fs; request IO directly to memory +@@ -2025,9 +2025,10 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg); + * @param se The current session + * @param msg A set of IO requests + * @param fd The fd to map +- * @return Zero on success ++ * @return Length on success, negative errno on error + */ +-int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd); ++int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, ++ int fd); + + /** + * For use with virtio-fs; wrapper for fuse_virtio_io for writes +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 416d285844..9577eaa68d 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -408,13 +408,13 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + if (len < msg.len[0]) { + msg.len[0] = len; + } +- bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd); ++ int64_t req_res = fuse_virtio_io(se, &msg, buf->buf[0].fd); + fuse_log(FUSE_LOG_DEBUG, + "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd " +- "c_offset=%p req_res=%d\n", ++ "c_offset=%p req_res=%ld\n", + __func__, len, bad_in_num, buf->buf[0].pos, + in_sg_ptr[0].iov_base, req_res); +- if (req_res) { ++ if (req_res > 0) { + len -= msg.len[0]; + buf->buf[0].pos += msg.len[0]; + in_sg_ptr++; +@@ -422,7 +422,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + } else if (req_res == 0) { + break; + } else { +- ret = EIO; ++ ret = req_res; + free(in_sg_cpy); + goto err; + } +@@ -1155,40 +1155,41 @@ void virtio_session_close(struct fuse_session *se) + se->virtio_dev = NULL; + } + +-int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) ++int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd) + { + if (!req->se->virtio_dev) { + return -ENODEV; + } +- return !vu_fs_cache_request(&req->se->virtio_dev->dev, +- VHOST_USER_SLAVE_FS_MAP, fd, msg); ++ return vu_fs_cache_request(&req->se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_MAP, fd, msg); + } + +-int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) ++int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg) + { + if (!se->virtio_dev) { + return -ENODEV; + } +- return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, +- -1, msg); ++ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP, ++ -1, msg); + } + +-int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) ++int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg) + { + if (!req->se->virtio_dev) { + return -ENODEV; + } +- return !vu_fs_cache_request(&req->se->virtio_dev->dev, +- VHOST_USER_SLAVE_FS_SYNC, -1, msg); ++ return vu_fs_cache_request(&req->se->virtio_dev->dev, ++ VHOST_USER_SLAVE_FS_SYNC, -1, msg); + } + +-int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd) ++int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, ++ int fd) + { + if (!se->virtio_dev) { + return -ENODEV; + } +- return !vu_fs_cache_request(&se->virtio_dev->dev, +- VHOST_USER_SLAVE_FS_IO, fd, msg); ++ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_IO, fd, ++ msg); + } + + /* +@@ -1214,8 +1215,7 @@ ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst, + msg.len[0] = len; + msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W; + +- bool result = !fuse_virtio_io(req->se, &msg, dst->fd); +- /* TODO: Rework the result path to actually get length/error */ +- fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result); +- return result ? len : -EIO; ++ int64_t result = fuse_virtio_io(req->se, &msg, dst->fd); ++ fuse_log(FUSE_LOG_DEBUG, "%s: result=%ld\n", __func__, result); ++ return result; + } +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch new file mode 100644 index 0000000000..db0596b2d6 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0025-DAX-virtiofsd-make-FUSE_REMOVEMAPPING-support-multip.patch @@ -0,0 +1,76 @@ +From 0946e9a802943443333eb7e8c6a0989f37c236a5 Mon Sep 17 00:00:00 2001 +From: Peng Tao +Date: Mon, 3 Jun 2019 10:47:19 +0800 +Subject: [PATCH 25/29] DAX: virtiofsd: make FUSE_REMOVEMAPPING support + multiple entries + +The fuse wire protocol is changed so that we can unmap multiple +mappings in a single call. + +Signed-off-by: Peng Tao +fix by: Catherine Ho +--- + tools/virtiofsd/fuse_lowlevel.c | 5 +++-- + tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++-------- + 2 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index a2480d4aa1..99ba000c2e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1920,12 +1920,13 @@ static void do_removemapping(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_removemapping_one *one; + + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); +- if (!arg) { ++ if (!arg || arg->count <= 0) { ++ fuse_log(FUSE_LOG_ERR, "do_removemapping: invalid arg %p\n", arg); + fuse_reply_err(req, EINVAL); + return; + } + +- one = fuse_mbuf_iter_advance(iter, sizeof(*one)); ++ one = fuse_mbuf_iter_advance(iter, arg->count * sizeof(*one)); + if (!one) { + fuse_log( + FUSE_LOG_ERR, +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ab33fabcda..3af55ffb8a 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2965,14 +2965,24 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se, + VhostUserFSSlaveMsg msg = { 0 }; + int ret = 0; + +- msg.len[0] = argp->len; +- msg.c_offset[0] = argp->moffset; +- if (fuse_virtio_unmap(se, &msg)) { +- fprintf(stderr, +- "%s: unmap over virtio failed " +- "(offset=0x%lx, len=0x%lx)\n", +- __func__, argp->moffset, argp->len); +- ret = EINVAL; ++ for (int i = 0; num > 0; i++, argp++) { ++ msg.len[i] = argp->len; ++ msg.c_offset[i] = argp->moffset; ++ ++ if (--num == 0 || i == VHOST_USER_FS_SLAVE_ENTRIES - 1) { ++ if (fuse_virtio_unmap(se, &msg)) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: unmap over virtio failed " ++ "(offset=0x%lx, len=0x%lx)\n", ++ __func__, argp->moffset, argp->len); ++ ret = EINVAL; ++ break; ++ } ++ if (num > 0) { ++ i = 0; ++ memset(&msg, 0, sizeof(msg)); ++ } ++ } + } + + fuse_reply_err(req, ret); +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch new file mode 100644 index 0000000000..e39c64c83d --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0026-DAX-virtiofsd-implement-FUSE_INIT-map_alignment-fiel.patch @@ -0,0 +1,42 @@ +From e684fffcaf21baf0f4341091303ce3c2dcbf822d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 26 Jul 2019 09:33:22 +0100 +Subject: [PATCH 26/29] DAX:virtiofsd: implement FUSE_INIT map_alignment field + +Communicate the host page size to the FUSE client so that +FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING requests are aware of our alignment +constraints. + +Signed-off-by: Stefan Hajnoczi +--- + tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 99ba000c2e..d6256f571b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2188,6 +2188,12 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + outarg.max_background = se->conn.max_background; + outarg.congestion_threshold = se->conn.congestion_threshold; + outarg.time_gran = se->conn.time_gran; ++ if (arg->flags & FUSE_MAP_ALIGNMENT) { ++ outarg.flags |= FUSE_MAP_ALIGNMENT; ++ ++ /* This constraint comes from mmap(2) and munmap(2) */ ++ outarg.map_alignment = ffsl(sysconf(_SC_PAGE_SIZE)) - 1; ++ } + + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); + fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +@@ -2197,6 +2203,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", + outarg.congestion_threshold); + fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); ++ fuse_log(FUSE_LOG_DEBUG, " map_alignment=%u\n", outarg.map_alignment); + + send_reply_ok(req, &outarg, outargsize); + } +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch new file mode 100644 index 0000000000..9505b723b5 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0027-virtiofsd-add-initial-support-for-shared-versions.patch @@ -0,0 +1,776 @@ +From a0cbb60bb58ffaf2ae771c7822f0cb25762076fa Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 20 Nov 2019 14:27:19 +0000 +Subject: [PATCH 27/29] virtiofsd: add initial support for shared versions + +Not backward compatible with previous kernels, so please only use with +kernel that has version table support (this will need to be cleaned up). + +No READDIRPLUS support in the kernel for versioned entries, so disable for +now. + +Attribute timeout is set to "infinity", so changes to underlying filesystem +won't be visible. This also needs to be fixed, but is best for testing the +versioning since the shared version is the only thing that will force +refreshing metadata and dcache lookups. + +No caching metadata modifications yet. + +Start "ireg" daemon before starting any fuse servers. + +Signed-off-by: Miklos Szeredi +Fix by: +Signed-off-by: Liu Bo +Only send entryver_out when shared is enabled by: +With help message update from: +Signed-off-by: Xiao Yang + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +--- + include/standard-headers/linux/fuse.h | 5 + + tools/virtiofsd/fuse_lowlevel.c | 36 ++- + tools/virtiofsd/fuse_lowlevel.h | 9 +- + tools/virtiofsd/helper.c | 4 + + tools/virtiofsd/ireg.h | 33 +++ + tools/virtiofsd/passthrough_ll.c | 321 +++++++++++++++++++++++++- + 6 files changed, 387 insertions(+), 21 deletions(-) + create mode 100644 tools/virtiofsd/ireg.h + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 82c0a38b59..fbced7caef 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -510,6 +510,11 @@ struct fuse_entry_out { + struct fuse_attr attr; + }; + ++struct fuse_entryver_out { ++ uint64_t version_index; ++ int64_t initial_version; ++}; ++ + struct fuse_forget_in { + uint64_t nlookup; + }; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index d6256f571b..47231378db 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -389,28 +389,46 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) + } + } + +-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e, ++ bool shared) + { +- struct fuse_entry_out arg; +- size_t size = sizeof(arg); ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_entryver_out)]; ++ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; ++ struct fuse_entryver_out *ever = ++ (struct fuse_entryver_out *)(buf + sizeof(struct fuse_entry_out)); ++ size_t size = sizeof(buf); + +- memset(&arg, 0, sizeof(arg)); +- fill_entry(&arg, e); +- return send_reply_ok(req, &arg, size); ++ if ((req->se->conn.proto_minor >= 9) && !shared) { ++ size -= sizeof(struct fuse_entryver_out); ++ } ++ ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ ever->initial_version = e->initial_version; ++ ever->version_index = e->version_offset; ++ return send_reply_ok(req, buf, size); + } + + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *f) ++ const struct fuse_file_info *f, bool shared) + { +- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out) + ++ sizeof(struct fuse_entryver_out)]; + size_t entrysize = sizeof(struct fuse_entry_out); + struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; + struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); ++ struct fuse_entryver_out *ever = ++ (struct fuse_entryver_out *)(buf + entrysize + ++ sizeof(struct fuse_open_out)); + + memset(buf, 0, sizeof(buf)); + fill_entry(earg, e); + fill_open(oarg, f); +- return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); ++ ever->initial_version = e->initial_version; ++ ever->version_index = e->version_offset; ++ return send_reply_ok(req, buf, ++ entrysize + sizeof(struct fuse_open_out) + ++ (shared ? sizeof(struct fuse_entryver_out) : 0)); + } + + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index a36a893871..5f60e3fd2c 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -26,6 +26,7 @@ + #include "fuse_common.h" + #include "standard-headers/linux/fuse.h" + ++#include + #include + #include + #include +@@ -104,6 +105,9 @@ struct fuse_entry_param { + * Flags for fuse_attr.flags that do not fit into attr. + */ + uint32_t attr_flags; ++ ++ uint64_t version_offset; ++ int64_t initial_version; + }; + + /** +@@ -1294,7 +1298,8 @@ void fuse_reply_none(fuse_req_t req); + * @param e the entry parameters + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e, ++ bool shared); + + /** + * Reply with a directory entry and open parameters +@@ -1314,7 +1319,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *fi); ++ const struct fuse_file_info *fi, bool shared); + + /** + * Reply with attributes +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 28243b51b2..29331ec2fc 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -174,6 +174,10 @@ void fuse_cmdline_help(void) + " default: no_xattr\n" + " -o modcaps=CAPLIST Modify the list of capabilities\n" + " e.g. -o modcaps=+sys_admin:-chown\n" ++ " -o shared|no_shared enable/disable shared cache\n" ++ " default: no_shared\n" ++ " please start 'ireg' daemon before " ++ " using shared cache\n" + " --rlimit-nofile= set maximum number of file descriptors\n" + " (0 leaves rlimit unchanged)\n" + " default: min(1000000, fs.file-max - 16384)\n" +diff --git a/tools/virtiofsd/ireg.h b/tools/virtiofsd/ireg.h +new file mode 100644 +index 0000000000..91c0f386d7 +--- /dev/null ++++ b/tools/virtiofsd/ireg.h +@@ -0,0 +1,33 @@ ++#define VERSION_TABLE_MAGIC 0x7265566465726853 ++ ++enum ireg_op { ++ IREG_GET, ++ IREG_PUT, ++}; ++ ++struct ireg_msg { ++ enum ireg_op op; ++ uint64_t handle; ++ union { ++ struct { ++ uint64_t ino; ++ uint64_t dev; ++ } get; ++ struct { ++ uint64_t refid; ++ } put; ++ }; ++}; ++ ++enum srv_op { ++ SRV_VERSION, ++}; ++ ++struct srv_msg { ++ enum srv_op op; ++ uint64_t handle; ++ struct { ++ uint64_t refid; ++ uint64_t offset; ++ } version; ++}; +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 3af55ffb8a..52a52b2dd7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -44,16 +44,21 @@ + #include + #include + #include ++#include + #include ++#include + #include + #include + #include ++#include + #include ++#include + #include + #include + #include + + #include "qemu/cutils.h" ++#include "ireg.h" + #include "passthrough_helpers.h" + #include "passthrough_seccomp.h" + +@@ -110,6 +115,8 @@ struct lo_inode { + */ + uint64_t nlookup; + ++ uint64_t version_offset; ++ uint64_t ireg_refid; + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ +@@ -152,12 +159,16 @@ struct lo_data { + char *modcaps; + double timeout; + int cache; ++ int shared; + int timeout_set; + int readdirplus_set; + int readdirplus_clear; + int allow_direct_io; + int announce_submounts; + bool use_statx; ++ int ireg_sock; ++ int64_t *version_table; ++ uint64_t version_table_size; + struct lo_inode root; + GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ +@@ -193,6 +204,8 @@ static const struct fuse_opt lo_opts[] = { + { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, + { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ { "shared", offsetof(struct lo_data, shared), 1 }, ++ { "no_shared", offsetof(struct lo_data, shared), 0 }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, + { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, +@@ -204,6 +217,7 @@ static bool use_syslog = false; + static int current_log_level; + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + uint64_t n); ++static void put_shared(struct lo_data *lo, struct lo_inode *inode); + + static struct { + pthread_mutex_t mutex; +@@ -512,6 +526,7 @@ static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) + + if (g_atomic_int_dec_and_test(&inode->refcount)) { + close(inode->fd); ++ put_shared(lo, inode); + free(inode); + } + } +@@ -587,8 +602,9 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + } + } + ++ /* TODO: shared version support for readdirplus */ + if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || +- lo->readdirplus_clear) { ++ lo->readdirplus_clear || lo->shared) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; + } +@@ -600,6 +616,29 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + } + } + ++static int64_t *version_ptr(struct lo_data *lo, struct lo_inode *inode) ++{ ++ return lo->version_table + inode->version_offset; ++} ++ ++static int64_t get_version(struct lo_data *lo, struct lo_inode *inode) ++{ ++ if (!inode->version_offset) { ++ return 0; ++ } ++ ++ return __atomic_load_8(version_ptr(lo, inode), __ATOMIC_SEQ_CST); ++} ++ ++static void update_version(struct lo_data *lo, struct lo_inode *inode) ++{ ++ if (!inode->version_offset) { ++ return; ++ } ++ ++ __atomic_add_fetch(version_ptr(lo, inode), 1, __ATOMIC_SEQ_CST); ++} ++ + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { +@@ -731,6 +770,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + goto out_err; + } + } ++ update_version(lo, inode); + lo_inode_put(lo, &inode); + + return lo_getattr(req, ino, fi); +@@ -763,6 +803,74 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, + return p; + } + ++struct msgreply { ++ struct lo_inode *inode; ++ sem_t ready; ++}; ++ ++static void get_shared(struct lo_data *lo, struct lo_inode *inode) ++{ ++ int res; ++ struct msgreply rep = { ++ .inode = inode, ++ }; ++ struct ireg_msg msg = { ++ .op = IREG_GET, ++ .handle = (uintptr_t) &rep, ++ .get = { ++ .ino = inode->key.ino, ++ .dev = inode->key.dev, ++ }, ++ }; ++ ++ if (lo->ireg_sock == -1) { ++ inode->version_offset = 0; ++ return; ++ } ++ ++ sem_init(&rep.ready, 0, 0); ++ ++ res = write(lo->ireg_sock, &msg, sizeof(msg)); ++ if (res != sizeof(msg)) { ++ if (res == -1) { ++ fuse_log(FUSE_LOG_WARNING, ++ "write(lo->ireg_sock, {IREG_GET, ...}): %m\n"); ++ } else { ++ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res); ++ } ++ return; ++ } ++ ++ while (sem_wait(&rep.ready)) { ++ ; ++ } ++ sem_destroy(&rep.ready); ++} ++ ++static void put_shared(struct lo_data *lo, struct lo_inode *inode) ++{ ++ int res; ++ struct ireg_msg msg = { ++ .op = IREG_PUT, ++ .put.refid = inode->ireg_refid, ++ }; ++ ++ if (lo->ireg_sock == -1) { ++ return; ++ } ++ ++ res = write(lo->ireg_sock, &msg, sizeof(msg)); ++ if (res != sizeof(msg)) { ++ if (res == -1) { ++ fuse_log(FUSE_LOG_WARNING, ++ "write(lo->ireg_sock, {IREG_PUT, ...}): %m\n"); ++ } else { ++ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res); ++ } ++ return; ++ } ++} ++ + /* value_destroy_func for posix_locks GHashTable */ + static void posix_locks_value_destroy(gpointer data) + { +@@ -908,16 +1016,30 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + } + pthread_mutex_lock(&lo->mutex); ++ get_shared(lo, inode); + inode->fuse_ino = lo_add_inode_mapping(req, inode); + g_hash_table_insert(lo->inodes, &inode->key, inode); + pthread_mutex_unlock(&lo->mutex); + } ++ ++ e->initial_version = get_version(lo, inode); ++ res = fstatat(inode->fd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ saverr = errno; ++ unref_inode_lolocked(lo, inode, 1); ++ errno = saverr; ++ goto out_err; ++ } ++ + e->ino = inode->fuse_ino; ++ e->version_offset = inode->version_offset; + lo_inode_put(lo, &inode); + lo_inode_put(lo, &dir); + +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, +- name, (unsigned long long)e->ino); ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli (version_table[%lli]=%lli)\n", ++ (unsigned long long)parent, name, (unsigned long long)e->ino, ++ (unsigned long long)e->version_offset, ++ (unsigned long long)e->initial_version); + + return 0; + +@@ -952,7 +1074,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + if (err) { + fuse_reply_err(req, err); + } else { +- fuse_reply_entry(req, &e); ++ fuse_reply_entry(req, &e, lo_data(req)->shared); + } + } + +@@ -1056,6 +1178,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + ++ update_version(lo, dir); ++ + saverr = lo_do_lookup(req, parent, name, &e); + if (saverr) { + goto out; +@@ -1064,7 +1188,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, + name, (unsigned long long)e.ino); + +- fuse_reply_entry(req, &e); ++ fuse_reply_entry(req, &e, lo->shared); + lo_inode_put(lo, &dir); + return; + +@@ -1134,11 +1258,13 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + inode->nlookup++; + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; ++ update_version(lo, inode); ++ update_version(lo, parent_inode); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, + name, (unsigned long long)e.ino); + +- fuse_reply_entry(req, &e); ++ fuse_reply_entry(req, &e, lo->shared); + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &inode); + return; +@@ -1192,8 +1318,21 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + } + + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ struct lo_inode *parent_inode; + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ update_version(lo, inode); ++ ++ parent_inode = lo_inode(req, parent); ++ if (parent_inode) { ++ update_version(lo, parent_inode); ++ lo_inode_put(lo, &parent_inode); ++ } ++ ++ fuse_reply_err(req, 0); ++ } + unref_inode_lolocked(lo, inode, 1); + lo_inode_put(lo, &inode); + } +@@ -1245,8 +1384,18 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ update_version(lo, oldinode); ++ if (newinode) { ++ update_version(lo, newinode); ++ } ++ update_version(lo, parent_inode); ++ update_version(lo, newparent_inode); ++ fuse_reply_err(req, 0); ++ } + +- fuse_reply_err(req, res == -1 ? errno : 0); + out: + unref_inode_lolocked(lo, oldinode, 1); + unref_inode_lolocked(lo, newinode, 1); +@@ -1274,8 +1423,21 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + } + + res = unlinkat(lo_fd(req, parent), name, 0); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ struct lo_inode *parent_inode; + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ update_version(lo, inode); ++ ++ parent_inode = lo_inode(req, parent); ++ if (parent_inode) { ++ update_version(lo, parent_inode); ++ lo_inode_put(lo, &parent_inode); ++ } ++ ++ fuse_reply_err(req, 0); ++ } + unref_inode_lolocked(lo, inode, 1); + lo_inode_put(lo, &inode); + } +@@ -1690,6 +1852,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + if (!err) { + ssize_t fh; + ++ update_version(lo, parent_inode); ++ + pthread_mutex_lock(&lo->mutex); + fh = lo_add_fd_mapping(req, fd); + pthread_mutex_unlock(&lo->mutex); +@@ -1714,7 +1878,7 @@ out: + if (err) { + fuse_reply_err(req, err); + } else { +- fuse_reply_create(req, &e, fi); ++ fuse_reply_create(req, &e, fi, lo->shared); + } + } + +@@ -2041,6 +2205,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + (void)ino; + ssize_t res; + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ struct lo_data *lo = lo_data(req); + bool cap_fsetid_dropped = false; + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +@@ -2067,6 +2232,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + if (res < 0) { + fuse_reply_err(req, -res); + } else { ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (inode) { ++ update_version(lo, inode); ++ lo_inode_put(lo, &inode); ++ } ++ + fuse_reply_write(req, (size_t)res); + } + +@@ -2095,6 +2268,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + off_t length, struct fuse_file_info *fi) + { + int err = EOPNOTSUPP; ++ struct lo_data *lo = lo_data(req); + (void)ino; + + #ifdef CONFIG_FALLOCATE +@@ -2112,6 +2286,16 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + err = posix_fallocate(lo_fi_fd(req, fi), offset, length); + #endif + ++ if (!err) { ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (inode) { ++ update_version(lo, inode); ++ lo_inode_put(lo, &inode); ++ } ++ } ++ + fuse_reply_err(req, err); + } + +@@ -2754,6 +2938,9 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, + + saverr = ret == -1 ? errno : 0; + ++ if (!saverr) { ++ update_version(lo, inode); ++ } + out: + if (fd >= 0) { + close(fd); +@@ -2820,6 +3007,9 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) + + saverr = ret == -1 ? errno : 0; + ++ if (!saverr) { ++ update_version(lo, inode); ++ } + out: + if (fd >= 0) { + close(fd); +@@ -3474,6 +3664,101 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + } + ++static void *ireg_do(void *data) ++{ ++ struct lo_data *lo = data; ++ int res; ++ char buf[100]; ++ struct srv_msg reply; ++ struct msgreply *rep; ++ ++ for (;;) { ++ res = read(lo->ireg_sock, buf, sizeof(buf)); ++ if (res <= 0) { ++ if (res == -1) { ++ fuse_log(FUSE_LOG_WARNING, "read(lo->ireg_sock, ...): %m\n"); ++ } else { ++ fuse_log(FUSE_LOG_WARNING, "disconnected from ireg\n"); ++ } ++ return NULL; ++ } ++ if (res != sizeof(reply)) { ++ fuse_log(FUSE_LOG_WARNING, "bad size message: %i\n", res); ++ continue; ++ } ++ ++ memcpy(&reply, buf, sizeof(reply)); ++ if (reply.op != SRV_VERSION) { ++ fuse_log(FUSE_LOG_WARNING, "bad reply to IREG_GET: %i\n", reply.op); ++ continue; ++ } ++ ++ rep = (struct msgreply *)(uintptr_t)reply.handle; ++ rep->inode->version_offset = reply.version.offset; ++ rep->inode->ireg_refid = reply.version.refid; ++ sem_post(&rep->ready); ++ } ++} ++ ++static void setup_shared_versions(struct lo_data *lo) ++{ ++ int fd, sock, res; ++ const char *version_path = "/dev/shm/fuse_shared_versions"; ++ struct stat stat; ++ struct sockaddr_un name = { .sun_family = AF_UNIX }; ++ const char *socket_name = "/tmp/ireg.sock"; ++ void *addr; ++ ++ lo->ireg_sock = -1; ++ if (!lo->shared) { ++ return; ++ } ++ ++ sock = socket(AF_UNIX, SOCK_SEQPACKET, 0); ++ if (sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "socket(AF_UNIX, SOCK_SEQPACKET, 0): %m\n"); ++ exit(1); ++ } ++ ++ strncpy(name.sun_path, socket_name, sizeof(name.sun_path) - 1); ++ ++ res = connect(sock, (const struct sockaddr *)&name, ++ sizeof(struct sockaddr_un)); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_WARNING, "connect to ireg: %m\n"); ++ close(sock); ++ lo->ireg_sock = -1; ++ return; ++ } ++ ++ lo->ireg_sock = sock; ++ ++ fd = open(version_path, O_RDWR); ++ if (sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(%s, O_RDWR): %m\n", version_path); ++ exit(1); ++ } ++ ++ res = fstat(fd, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fstat(%i, &stat): %m\n", fd); ++ exit(1); ++ } ++ ++ lo->version_table_size = stat.st_size / sizeof(lo->version_table[0]); ++ ++ addr = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ if (addr == MAP_FAILED) { ++ fuse_log( ++ FUSE_LOG_ERR, ++ "mmap(NULL, %li, PROT_READ | PROT_WRITE, MAP_SHARED, %i, 0): %m\n", ++ stat.st_size, fd); ++ exit(1); ++ } ++ ++ lo->version_table = addr; ++} ++ + static void setup_root(struct lo_data *lo, struct lo_inode *root) + { + int fd, res; +@@ -3688,6 +3973,7 @@ int main(int argc, char *argv[]) + + lo.use_statx = true; + ++ setup_shared_versions(&lo); + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); + if (se == NULL) { + goto err_out1; +@@ -3711,9 +3997,24 @@ int main(int argc, char *argv[]) + setup_sandbox(&lo, se, opts.syslog); + + setup_root(&lo, &lo.root); ++ ++ if (lo.ireg_sock != -1) { ++ pthread_t ireg_thread; ++ ++ ret = pthread_create(&ireg_thread, NULL, ireg_do, &lo); ++ if (ret) { ++ fuse_log(FUSE_LOG_WARNING, "pthread_create: %s\n", strerror(ret)); ++ ret = 1; ++ goto err_out4; ++ } ++ ++ get_shared(&lo, &lo.root); ++ } ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + ++err_out4: + fuse_session_unmount(se); + cleanup_capng(); + err_out3: +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch new file mode 100644 index 0000000000..c4af4e2149 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0028-virtio-fs-Allow-mapping-of-meta-data-version-table.patch @@ -0,0 +1,167 @@ +From 119990ab3a30564c7e44f4e39344be48fc998f26 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 27 Jul 2018 10:36:41 +0100 +Subject: [PATCH 28/29] virtio-fs: Allow mapping of meta data version table + +The 'meta data version table' is a block of shared memory mapped between +multiple QEMUs and fuse daemons, so that they can be informed +of metadata updates. It's typically a shmfs file, and +it's specified as : + + -device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=1G,versiontable=/dev/shm/mdvt1 + +It gets mapped into the PCI bar after the data cache; it's read only. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/virtio/vhost-user-fs-pci.c | 16 +++++++++-- + hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++ + include/hw/virtio/vhost-user-fs.h | 4 +++ + include/standard-headers/linux/virtio_fs.h | 1 + + 4 files changed, 51 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c +index 19aaa8d722..aad0128fa5 100644 +--- a/hw/virtio/vhost-user-fs-pci.c ++++ b/hw/virtio/vhost-user-fs-pci.c +@@ -42,6 +42,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + uint64_t cachesize; ++ uint64_t totalsize; + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + /* Also reserve config change and hiprio queue vectors */ +@@ -51,18 +52,29 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + cachesize = dev->vdev.conf.cache_size; + ++ /* PCIe bar needs to be a power of 2 */ ++ totalsize = pow2ceil(cachesize + dev->vdev.mdvt_size); ++ + /* + * The bar starts with the data/DAX cache +- * Others will be added later. ++ * followed by the metadata cache. + */ + memory_region_init(&dev->cachebar, OBJECT(vpci_dev), +- "vhost-fs-pci-cachebar", cachesize); ++ "vhost-fs-pci-cachebar", totalsize); + if (cachesize) { + memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache); + virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize, + VIRTIO_FS_SHMCAP_ID_CACHE); + } + ++ if (dev->vdev.mdvt_size) { ++ memory_region_add_subregion(&dev->cachebar, cachesize, ++ &dev->vdev.mdvt); ++ virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, ++ cachesize, dev->vdev.mdvt_size, ++ VIRTIO_FS_SHMCAP_ID_VERTAB); ++ } ++ + /* After 'realized' so the memory region exists */ + pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR, + PCI_BASE_ADDRESS_SPACE_MEMORY | +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index b43725824f..fb16db7e0d 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -432,6 +432,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) + unsigned int i; + size_t len; + int ret; ++ int mdvtfd = -1; + + if (!fs->conf.chardev.chr) { + error_setg(errp, "missing chardev"); +@@ -475,6 +476,28 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) + "no smaller than the page size"); + return; + } ++ if (fs->conf.mdvtpath) { ++ struct stat statbuf; ++ ++ mdvtfd = open(fs->conf.mdvtpath, O_RDWR); ++ if (mdvtfd < 0) { ++ error_setg_errno(errp, errno, ++ "Failed to open meta-data version table '%s'", ++ fs->conf.mdvtpath); ++ ++ return; ++ } ++ if (fstat(mdvtfd, &statbuf) == -1) { ++ error_setg_errno(errp, errno, ++ "Failed to stat meta-data version table '%s'", ++ fs->conf.mdvtpath); ++ close(mdvtfd); ++ return; ++ } ++ ++ fs->mdvt_size = statbuf.st_size; ++ } ++ + if (fs->conf.cache_size) { + /* Anonymous, private memory is not counted as overcommit */ + cache_ptr = mmap(NULL, fs->conf.cache_size, DAX_WINDOW_PROT, +@@ -489,6 +512,14 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) + fs->conf.cache_size, cache_ptr); + } + ++ if (mdvtfd) { ++ memory_region_init_ram_from_fd(&fs->mdvt, OBJECT(vdev), ++ "virtio-fs-mdvt", ++ fs->mdvt_size, true, mdvtfd, NULL); ++ /* The version table is read-only by the guest */ ++ memory_region_set_readonly(&fs->mdvt, true); ++ } ++ + if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) { + return; + } +@@ -564,6 +595,7 @@ static Property vuf_properties[] = { + conf.num_request_queues, 1), + DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), + DEFINE_PROP_SIZE("cache-size", VHostUserFS, conf.cache_size, 0), ++ DEFINE_PROP_STRING("versiontable", VHostUserFS, conf.mdvtpath), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 845cdb0177..83015ac0fc 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -47,6 +47,7 @@ typedef struct { + uint16_t num_request_queues; + uint16_t queue_size; + uint64_t cache_size; ++ char *mdvtpath; + } VHostUserFSConf; + + struct VHostUserFS { +@@ -61,6 +62,9 @@ struct VHostUserFS { + + /*< public >*/ + MemoryRegion cache; ++ /* Metadata version table */ ++ size_t mdvt_size; ++ MemoryRegion mdvt; + }; + + /* Callbacks from the vhost-user code for slave commands */ +diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h +index 808aa3a402..a17b5172a8 100644 +--- a/include/standard-headers/linux/virtio_fs.h ++++ b/include/standard-headers/linux/virtio_fs.h +@@ -18,6 +18,7 @@ struct virtio_fs_config { + + /* For the id field in virtio_pci_shm_cap */ + #define VIRTIO_FS_SHMCAP_ID_CACHE 0 ++#define VIRTIO_FS_SHMCAP_ID_VERTAB 1 + + #define VIRTIO_FS_PCI_CACHE_BAR 2 + +-- +2.25.1 + diff --git a/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch new file mode 100644 index 0000000000..5b593014e1 --- /dev/null +++ b/tools/packaging/qemu/patches/tag_patches/470dd6bd360782f5137f7e3376af6a44658eb1d3/0029-virtiofsd-Add-printf-checking-to-fuse_log.patch @@ -0,0 +1,35 @@ +From e2a3c273639368221dae39a7f230a46d0a580e4d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 21 Jan 2020 10:20:14 +0000 +Subject: [PATCH 29/29] virtiofsd: Add printf checking to fuse_log + +Use qemu's GCC_FMT_ATTR to add printf style checking to fuse_log. + +Signed-off-by: Dr. David Alan Gilbert +--- + tools/virtiofsd/fuse_log.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 8d7091bd4d..5c2df71603 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -14,6 +14,7 @@ + * This file defines the logging interface of FUSE + */ + ++#include "qemu/compiler.h" + + /** + * Log severity level +@@ -68,6 +69,7 @@ void fuse_set_log_func(fuse_log_func_t func); + * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) + * @param fmt sprintf-style format string including newline + */ ++GCC_FMT_ATTR(2,3) + void fuse_log(enum fuse_log_level level, const char *fmt, ...); + + #endif /* FUSE_LOG_H_ */ +-- +2.25.1 + diff --git a/versions.yaml b/versions.yaml index e382a2ebb2..23c27fc57b 100644 --- a/versions.yaml +++ b/versions.yaml @@ -107,8 +107,8 @@ assets: qemu-experimental: description: "QEMU with virtiofs support" - url: "https://gitlab.com/virtio-fs/qemu" - version: "qemu5.0-virtiofs-with51bits-dax" + url: "https://github.com/qemu/qemu" + version: "470dd6bd360782f5137f7e3376af6a44658eb1d3" image: description: |