mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-26 15:32:30 +00:00
versions: qemu-experimental: 6.0~rc 470dd6
Move to next 6.0 dev tree for qemu experimental, the qemu version is the same base as: https://gitlab.com/virtio-fs/qemu/-/commits/virtio-fs-dev/ Using qemu 6.0-rc1 some patches does not apply. Fixes: #1624 Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
This commit is contained in:
parent
6491b9d7aa
commit
f365bdb7cf
@ -0,0 +1,270 @@
|
|||||||
|
From d14a6cb000d0a5f9e382e5e5de0021756034d0cb Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Thu, 2 May 2019 18:04:04 +0100
|
||||||
|
Subject: [PATCH 01/29] DAX: libvhost-user: Allow popping a queue element with
|
||||||
|
bad pointers
|
||||||
|
|
||||||
|
Allow a daemon implemented with libvhost-user to accept an
|
||||||
|
element with pointers to memory that aren't in the mapping table.
|
||||||
|
The daemon might have some special way to deal with some special
|
||||||
|
cases of this.
|
||||||
|
|
||||||
|
The default behaviour doesn't change.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
contrib/vhost-user-blk/vhost-user-blk.c | 3 +-
|
||||||
|
contrib/vhost-user-gpu/vhost-user-gpu.c | 5 ++-
|
||||||
|
contrib/vhost-user-input/main.c | 4 +-
|
||||||
|
contrib/vhost-user-scsi/vhost-user-scsi.c | 2 +-
|
||||||
|
subprojects/libvhost-user/libvhost-user.c | 51 ++++++++++++++++++-----
|
||||||
|
subprojects/libvhost-user/libvhost-user.h | 8 +++-
|
||||||
|
tests/vhost-user-bridge.c | 4 +-
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 3 +-
|
||||||
|
8 files changed, 59 insertions(+), 21 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
|
||||||
|
index d14b2896bf..01193552e9 100644
|
||||||
|
--- a/contrib/vhost-user-blk/vhost-user-blk.c
|
||||||
|
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
|
||||||
|
@@ -235,7 +235,8 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
|
||||||
|
unsigned out_num;
|
||||||
|
VubReq *req;
|
||||||
|
|
||||||
|
- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
|
||||||
|
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq),
|
||||||
|
+ NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c
|
||||||
|
index f445ef28ec..58161a4378 100644
|
||||||
|
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
|
||||||
|
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
|
||||||
|
@@ -819,7 +819,8 @@ vg_handle_ctrl(VuDev *dev, int qidx)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command));
|
||||||
|
+ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command),
|
||||||
|
+ NULL, NULL);
|
||||||
|
if (!cmd) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -922,7 +923,7 @@ vg_handle_cursor(VuDev *dev, int qidx)
|
||||||
|
struct virtio_gpu_update_cursor cursor;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c
|
||||||
|
index c15d18c33f..d5c435605c 100644
|
||||||
|
--- a/contrib/vhost-user-input/main.c
|
||||||
|
+++ b/contrib/vhost-user-input/main.c
|
||||||
|
@@ -57,7 +57,7 @@ static void vi_input_send(VuInput *vi, struct virtio_input_event *event)
|
||||||
|
|
||||||
|
/* ... then check available space ... */
|
||||||
|
for (i = 0; i < vi->qindex; i++) {
|
||||||
|
- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
while (--i >= 0) {
|
||||||
|
vu_queue_unpop(dev, vq, vi->queue[i].elem, 0);
|
||||||
|
@@ -141,7 +141,7 @@ static void vi_handle_sts(VuDev *dev, int qidx)
|
||||||
|
g_debug("%s", G_STRFUNC);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c
|
||||||
|
index 4f6e3e2a24..7564d6ab2d 100644
|
||||||
|
--- a/contrib/vhost-user-scsi/vhost-user-scsi.c
|
||||||
|
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
|
||||||
|
@@ -252,7 +252,7 @@ static void vus_proc_req(VuDev *vu_dev, int idx)
|
||||||
|
VirtIOSCSICmdReq *req;
|
||||||
|
VirtIOSCSICmdResp *rsp;
|
||||||
|
|
||||||
|
- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
g_debug("No more elements pending on vq[%d]@%p", idx, vq);
|
||||||
|
break;
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
index fab7ca17ee..3b1b5c385f 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
@@ -2461,7 +2461,8 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
|
||||||
|
|
||||||
|
static bool
|
||||||
|
virtqueue_map_desc(VuDev *dev,
|
||||||
|
- unsigned int *p_num_sg, struct iovec *iov,
|
||||||
|
+ unsigned int *p_num_sg, unsigned int *p_bad_sg,
|
||||||
|
+ struct iovec *iov,
|
||||||
|
unsigned int max_num_sg, bool is_write,
|
||||||
|
uint64_t pa, size_t sz)
|
||||||
|
{
|
||||||
|
@@ -2482,10 +2483,35 @@ virtqueue_map_desc(VuDev *dev,
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
- iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
|
||||||
|
- if (iov[num_sg].iov_base == NULL) {
|
||||||
|
- vu_panic(dev, "virtio: invalid address for buffers");
|
||||||
|
- return false;
|
||||||
|
+ if (p_bad_sg && *p_bad_sg) {
|
||||||
|
+ /* A previous mapping was bad, we won't try and map this either */
|
||||||
|
+ *p_bad_sg = *p_bad_sg + 1;
|
||||||
|
+ }
|
||||||
|
+ if (!p_bad_sg || !*p_bad_sg) {
|
||||||
|
+ /* No bad mappings so far, lets try mapping this one */
|
||||||
|
+ iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
|
||||||
|
+ if (iov[num_sg].iov_base == NULL) {
|
||||||
|
+ /*
|
||||||
|
+ * OK, it won't map, either panic or if the caller can handle
|
||||||
|
+ * it, then count it.
|
||||||
|
+ */
|
||||||
|
+ if (!p_bad_sg) {
|
||||||
|
+ vu_panic(dev, "virtio: invalid address for buffers");
|
||||||
|
+ return false;
|
||||||
|
+ } else {
|
||||||
|
+ *p_bad_sg = *p_bad_sg + 1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (p_bad_sg && *p_bad_sg) {
|
||||||
|
+ /*
|
||||||
|
+ * There was a bad mapping, either now or previously, since
|
||||||
|
+ * the caller set p_bad_sg it means it's prepared to deal with
|
||||||
|
+ * it, so give it the pa in the iov
|
||||||
|
+ * Note: In this case len will be the whole sz, so we won't
|
||||||
|
+ * go around again for this descriptor
|
||||||
|
+ */
|
||||||
|
+ iov[num_sg].iov_base = (void *)(uintptr_t)pa;
|
||||||
|
}
|
||||||
|
iov[num_sg].iov_len = len;
|
||||||
|
num_sg++;
|
||||||
|
@@ -2516,7 +2542,8 @@ virtqueue_alloc_element(size_t sz,
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
-vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
|
||||||
|
+vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz,
|
||||||
|
+ unsigned int *p_bad_in, unsigned int *p_bad_out)
|
||||||
|
{
|
||||||
|
struct vring_desc *desc = vq->vring.desc;
|
||||||
|
uint64_t desc_addr, read_len;
|
||||||
|
@@ -2560,7 +2587,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
|
||||||
|
/* Collect all the descriptors */
|
||||||
|
do {
|
||||||
|
if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
|
||||||
|
- if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
|
||||||
|
+ if (!virtqueue_map_desc(dev, &in_num, p_bad_in, iov + out_num,
|
||||||
|
VIRTQUEUE_MAX_SIZE - out_num, true,
|
||||||
|
le64toh(desc[i].addr),
|
||||||
|
le32toh(desc[i].len))) {
|
||||||
|
@@ -2571,7 +2598,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
|
||||||
|
vu_panic(dev, "Incorrect order for descriptors");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
- if (!virtqueue_map_desc(dev, &out_num, iov,
|
||||||
|
+ if (!virtqueue_map_desc(dev, &out_num, p_bad_out, iov,
|
||||||
|
VIRTQUEUE_MAX_SIZE, false,
|
||||||
|
le64toh(desc[i].addr),
|
||||||
|
le32toh(desc[i].len))) {
|
||||||
|
@@ -2661,7 +2688,8 @@ vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
-vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
||||||
|
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz,
|
||||||
|
+ unsigned int *p_bad_in, unsigned int *p_bad_out)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned int head;
|
||||||
|
@@ -2674,7 +2702,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
||||||
|
|
||||||
|
if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
|
||||||
|
i = (--vq->resubmit_num);
|
||||||
|
- elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
|
||||||
|
+ elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz,
|
||||||
|
+ p_bad_in, p_bad_out);
|
||||||
|
|
||||||
|
if (!vq->resubmit_num) {
|
||||||
|
free(vq->resubmit_list);
|
||||||
|
@@ -2706,7 +2735,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
||||||
|
vring_set_avail_event(vq, vq->last_avail_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
- elem = vu_queue_map_desc(dev, vq, head, sz);
|
||||||
|
+ elem = vu_queue_map_desc(dev, vq, head, sz, p_bad_in, p_bad_out);
|
||||||
|
|
||||||
|
if (!elem) {
|
||||||
|
return NULL;
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
index 7d47f1364a..f0aca2b216 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
@@ -589,11 +589,17 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
|
||||||
|
* @dev: a VuDev context
|
||||||
|
* @vq: a VuVirtq queue
|
||||||
|
* @sz: the size of struct to return (must be >= VuVirtqElement)
|
||||||
|
+ * @p_bad_in: If none NULL, a pointer to an integer count of
|
||||||
|
+ * unmappable regions in input descriptors
|
||||||
|
+ * @p_bad_out: If none NULL, a pointer to an integer count of
|
||||||
|
+ * unmappable regions in output descriptors
|
||||||
|
+ *
|
||||||
|
*
|
||||||
|
* Returns: a VuVirtqElement filled from the queue or NULL. The
|
||||||
|
* returned element must be free()-d by the caller.
|
||||||
|
*/
|
||||||
|
-void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
|
||||||
|
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz,
|
||||||
|
+ unsigned int *p_bad_in, unsigned int *p_bad_out);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
|
||||||
|
index 24815920b2..4f6829e6c3 100644
|
||||||
|
--- a/tests/vhost-user-bridge.c
|
||||||
|
+++ b/tests/vhost-user-bridge.c
|
||||||
|
@@ -184,7 +184,7 @@ vubr_handle_tx(VuDev *dev, int qidx)
|
||||||
|
unsigned int out_num;
|
||||||
|
struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
|
||||||
|
|
||||||
|
- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -299,7 +299,7 @@ vubr_backend_recv_cb(int sock, void *ctx)
|
||||||
|
ssize_t ret, total = 0;
|
||||||
|
unsigned int num;
|
||||||
|
|
||||||
|
- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
|
||||||
|
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement), NULL, NULL);
|
||||||
|
if (!elem) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index ddcefee427..bd19358437 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -657,7 +657,8 @@ static void *fv_queue_thread(void *opaque)
|
||||||
|
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
- FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
|
||||||
|
+ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest),
|
||||||
|
+ NULL, NULL);
|
||||||
|
if (!req) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,36 @@
|
|||||||
|
From da5d60ab13c9e31f775b34d7afe6d82fca7f2336 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wainer dos Santos Moschetta <wainersm@redhat.com>
|
||||||
|
Date: Tue, 2 Feb 2021 13:46:24 -0500
|
||||||
|
Subject: [PATCH] virtiofsd: Allow to build it without the tools
|
||||||
|
|
||||||
|
This changed the Meson build script to allow virtiofsd be built even
|
||||||
|
though the tools build is disabled, thus honoring the --enable-virtiofsd
|
||||||
|
option.
|
||||||
|
|
||||||
|
(Backport of commit xxxxxx)
|
||||||
|
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
|
||||||
|
---
|
||||||
|
tools/meson.build | 7 +++++--
|
||||||
|
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/meson.build b/tools/meson.build
|
||||||
|
index fdce66857d..3e5a0abfa2 100644
|
||||||
|
--- a/tools/meson.build
|
||||||
|
+++ b/tools/meson.build
|
||||||
|
@@ -10,8 +10,11 @@ if get_option('virtiofsd').enabled()
|
||||||
|
error('virtiofsd requires Linux')
|
||||||
|
elif not seccomp.found() or not libcap_ng.found()
|
||||||
|
error('virtiofsd requires libcap-ng-devel and seccomp-devel')
|
||||||
|
- elif not have_tools or 'CONFIG_VHOST_USER' not in config_host
|
||||||
|
- error('virtiofsd needs tools and vhost-user support')
|
||||||
|
+ elif 'CONFIG_VHOST_USER' not in config_host
|
||||||
|
+ error('virtiofsd needs vhost-user support')
|
||||||
|
+ else
|
||||||
|
+ # Disabled all the tools but virtiofsd.
|
||||||
|
+ have_virtiofsd = true
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
elif get_option('virtiofsd').disabled() or not have_system
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,155 @@
|
|||||||
|
From bb506adc3bc3e3c0cad695b3bab126afdc3f0536 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 16 May 2019 15:11:35 +0100
|
||||||
|
Subject: [PATCH 02/29] virtiofsd: add security guide document
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Many people want to know: what's up with virtiofsd and security? This
|
||||||
|
document provides the answers!
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||||
|
---
|
||||||
|
docs/tools/index.rst | 1 +
|
||||||
|
docs/tools/virtiofsd-security.rst | 118 ++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 119 insertions(+)
|
||||||
|
create mode 100644 docs/tools/virtiofsd-security.rst
|
||||||
|
|
||||||
|
diff --git a/docs/tools/index.rst b/docs/tools/index.rst
|
||||||
|
index 3a5829c17a..d5b65f803b 100644
|
||||||
|
--- a/docs/tools/index.rst
|
||||||
|
+++ b/docs/tools/index.rst
|
||||||
|
@@ -17,3 +17,4 @@ Contents:
|
||||||
|
qemu-trace-stap
|
||||||
|
virtfs-proxy-helper
|
||||||
|
virtiofsd
|
||||||
|
+ virtiofsd-security
|
||||||
|
diff --git a/docs/tools/virtiofsd-security.rst b/docs/tools/virtiofsd-security.rst
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..61ce551344
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/docs/tools/virtiofsd-security.rst
|
||||||
|
@@ -0,0 +1,118 @@
|
||||||
|
+========================
|
||||||
|
+Virtiofsd Security Guide
|
||||||
|
+========================
|
||||||
|
+
|
||||||
|
+Introduction
|
||||||
|
+============
|
||||||
|
+This document covers security topics for users of virtiofsd, the daemon that
|
||||||
|
+implements host<->guest file system sharing. Sharing files between one or more
|
||||||
|
+guests and the host raises questions about the trust relationships between
|
||||||
|
+these entities. By understanding these topics users can safely deploy
|
||||||
|
+virtiofsd and control access to their data.
|
||||||
|
+
|
||||||
|
+Architecture
|
||||||
|
+============
|
||||||
|
+The virtiofsd daemon process acts as a vhost-user device backend, implementing
|
||||||
|
+the virtio-fs device that the corresponding device driver inside the guest
|
||||||
|
+interacts with.
|
||||||
|
+
|
||||||
|
+There is one virtiofsd process per virtio-fs device instance. For example,
|
||||||
|
+when two guests have access to the same shared directory there are still two
|
||||||
|
+virtiofsd processes since there are two virtio-fs device instances. Similarly,
|
||||||
|
+if one guest has access to two shared directories, there are two virtiofsd
|
||||||
|
+processes since there are two virtio-fs device instances.
|
||||||
|
+
|
||||||
|
+Files are created on the host with uid/gid values provided by the guest.
|
||||||
|
+Furthermore, virtiofsd is unable to enforce file permissions since guests have
|
||||||
|
+the ability to access any file within the shared directory. File permissions
|
||||||
|
+are implemented in the guest, just like with traditional local file systems.
|
||||||
|
+
|
||||||
|
+Security Requirements
|
||||||
|
+=====================
|
||||||
|
+Guests have root access to the shared directory. This is necessary for root
|
||||||
|
+file systems on virtio-fs and similar use cases.
|
||||||
|
+
|
||||||
|
+When multiple guests have access to the same shared directory, the guests have
|
||||||
|
+a trust relationship. A broken or malicious guest could delete or corrupt
|
||||||
|
+files. It could exploit symlink or time-of-check-to-time-of-use (TOCTOU) race
|
||||||
|
+conditions against applications in other guests. It could plant device nodes
|
||||||
|
+or setuid executables to gain privileges in other guests. It could perform
|
||||||
|
+denial-of-service (DoS) attacks by consuming available space or making the file
|
||||||
|
+system unavailable to other guests.
|
||||||
|
+
|
||||||
|
+Guests are restricted to the shared directory and cannot access other files on
|
||||||
|
+the host.
|
||||||
|
+
|
||||||
|
+Guests should not be able to gain arbitrary code execution inside the virtiofsd
|
||||||
|
+process. If they do, the process is sandboxed to prevent escaping into other
|
||||||
|
+parts of the host.
|
||||||
|
+
|
||||||
|
+Daemon Sandboxing
|
||||||
|
+=================
|
||||||
|
+The virtiofsd process handles virtio-fs FUSE requests from the untrusted guest.
|
||||||
|
+This attack surface could give the guest access to host resources and must
|
||||||
|
+therefore be protected. Sandboxing mechanisms are integrated into virtiofsd to
|
||||||
|
+reduce the impact in the event that an attacker gains control of the process.
|
||||||
|
+
|
||||||
|
+As a general rule, virtiofsd does not trust inputs from the guest, aside from
|
||||||
|
+uid/gid values. Input validation is performed so that the guest cannot corrupt
|
||||||
|
+memory or otherwise gain arbitrary code execution in the virtiofsd process.
|
||||||
|
+
|
||||||
|
+Sandboxing adds restrictions on the virtiofsd so that even if an attacker is
|
||||||
|
+able to exploit a bug, they will be constrained to the virtiofsd process and
|
||||||
|
+unable to cause damage on the host.
|
||||||
|
+
|
||||||
|
+Seccomp Whitelist
|
||||||
|
+-----------------
|
||||||
|
+Many system calls are not required by virtiofsd to perform its function. For
|
||||||
|
+example, ptrace(2) and execve(2) are not necessary and attackers are likely to
|
||||||
|
+use them to further compromise the system. This is prevented using a seccomp
|
||||||
|
+whitelist in virtiofsd.
|
||||||
|
+
|
||||||
|
+During startup virtiofsd installs a whitelist of allowed system calls. All
|
||||||
|
+other system calls are forbidden for the remaining lifetime of the process.
|
||||||
|
+This list has been built through experience of running virtiofsd on several
|
||||||
|
+flavors of Linux and observing which system calls were encountered.
|
||||||
|
+
|
||||||
|
+It is possible that previously unexplored code paths or newer library versions
|
||||||
|
+will invoke system calls that have not been whitelisted yet. In this case the
|
||||||
|
+process terminates and a seccomp error is captured in the audit log. The log
|
||||||
|
+can typically be viewed using ``journalctl -xe`` and searching for ``SECCOMP``.
|
||||||
|
+
|
||||||
|
+Should it be necessary to extend the whitelist, system call numbers from the
|
||||||
|
+audit log can be translated to names through a CPU architecture-specific
|
||||||
|
+``.tbl`` file in the Linux source tree. They can then be added to the
|
||||||
|
+whitelist in ``seccomp.c`` in the virtiofsd source tree.
|
||||||
|
+
|
||||||
|
+Mount Namespace
|
||||||
|
+---------------
|
||||||
|
+During startup virtiofsd enters a new mount namespace and releases all mounts
|
||||||
|
+except for the shared directory. This makes the file system root `/` the
|
||||||
|
+shared directory. It is impossible to access files outside the shared
|
||||||
|
+directory since they cannot be looked up by path resolution.
|
||||||
|
+
|
||||||
|
+Several attacks, including `..` traversal and symlink escapes, are prevented by
|
||||||
|
+the mount namespace.
|
||||||
|
+
|
||||||
|
+The current virtiofsd implementation keeps a directory file descriptor to
|
||||||
|
+/proc/self/fd open in order to implement several FUSE requests. This file
|
||||||
|
+descriptor could be used by attackers to access files outside the shared
|
||||||
|
+directory. This limitation will be addressed in a future release of virtiofsd.
|
||||||
|
+
|
||||||
|
+Other Namespaces
|
||||||
|
+----------------
|
||||||
|
+Virtiofsd enters new pid and network namespaces during startup. The pid
|
||||||
|
+namespace prevents the process from seeing other processes running on the host.
|
||||||
|
+The network namespace removes network connectivity from the process.
|
||||||
|
+
|
||||||
|
+Deployment Best Practices
|
||||||
|
+=========================
|
||||||
|
+The shared directory should be a separate file system so that untrusted guests
|
||||||
|
+cannot cause a denial-of-service by using up all available inodes or exhausting
|
||||||
|
+free space.
|
||||||
|
+
|
||||||
|
+If the shared directory is also accessible from a host mount namespace, it is
|
||||||
|
+recommended to keep a parent directory with rwx------ permissions so that other
|
||||||
|
+users on the host are unable to access any setuid executables or device nodes
|
||||||
|
+in the shared directory. The `nosuid` and `nodev` mount options can also be
|
||||||
|
+used to prevent this issue.
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,110 @@
|
|||||||
|
From 800ce0d08e09320ac2f1bd9125cb07d14a2689fe Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Thu, 7 Feb 2019 18:39:31 +0000
|
||||||
|
Subject: [PATCH 03/29] DAX contrib/libvhost-user: Add virtio-fs slave types
|
||||||
|
|
||||||
|
Add virtio-fs definitions to libvhost-user
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
subprojects/libvhost-user/libvhost-user.c | 32 +++++++++++++++++++++++
|
||||||
|
subprojects/libvhost-user/libvhost-user.h | 31 ++++++++++++++++++++++
|
||||||
|
2 files changed, 63 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
index 3b1b5c385f..9b8223b5d5 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
@@ -2910,3 +2910,35 @@ vu_queue_push(VuDev *dev, VuVirtq *vq,
|
||||||
|
vu_queue_flush(dev, vq, 1);
|
||||||
|
vu_queue_inflight_post_put(dev, vq, elem->index);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
+ VhostUserFSSlaveMsg *fsm)
|
||||||
|
+{
|
||||||
|
+ int fd_num = 0;
|
||||||
|
+ VhostUserMsg vmsg = {
|
||||||
|
+ .request = req,
|
||||||
|
+ .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
|
||||||
|
+ .size = sizeof(vmsg.payload.fs),
|
||||||
|
+ .payload.fs = *fsm,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ if (fd != -1) {
|
||||||
|
+ vmsg.fds[fd_num++] = fd;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vmsg.fd_num = fd_num;
|
||||||
|
+
|
||||||
|
+ if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ pthread_mutex_lock(&dev->slave_mutex);
|
||||||
|
+ if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
|
||||||
|
+ pthread_mutex_unlock(&dev->slave_mutex);
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Also unlocks the slave_mutex */
|
||||||
|
+ return vu_process_message_reply(dev, &vmsg);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
index f0aca2b216..f3b0998eea 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
@@ -122,6 +122,24 @@ typedef enum VhostUserSlaveRequest {
|
||||||
|
VHOST_USER_SLAVE_MAX
|
||||||
|
} VhostUserSlaveRequest;
|
||||||
|
|
||||||
|
+/* Structures carried over the slave channel back to QEMU */
|
||||||
|
+#define VHOST_USER_FS_SLAVE_ENTRIES 8
|
||||||
|
+
|
||||||
|
+/* For the flags field of VhostUserFSSlaveMsg */
|
||||||
|
+#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0)
|
||||||
|
+#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1)
|
||||||
|
+
|
||||||
|
+typedef struct {
|
||||||
|
+ /* Offsets within the file being mapped */
|
||||||
|
+ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Offsets within the cache */
|
||||||
|
+ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Lengths of sections */
|
||||||
|
+ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Flags, from VHOST_USER_FS_FLAG_* */
|
||||||
|
+ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+} VhostUserFSSlaveMsg;
|
||||||
|
+
|
||||||
|
typedef struct VhostUserMemoryRegion {
|
||||||
|
uint64_t guest_phys_addr;
|
||||||
|
uint64_t memory_size;
|
||||||
|
@@ -197,6 +215,7 @@ typedef struct VhostUserMsg {
|
||||||
|
VhostUserConfig config;
|
||||||
|
VhostUserVringArea area;
|
||||||
|
VhostUserInflight inflight;
|
||||||
|
+ VhostUserFSSlaveMsg fs;
|
||||||
|
} payload;
|
||||||
|
|
||||||
|
int fds[VHOST_MEMORY_BASELINE_NREGIONS];
|
||||||
|
@@ -693,4 +712,16 @@ void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
|
||||||
|
bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
|
||||||
|
unsigned int out_bytes);
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * vu_fs_cache_request: Send a slave message for an fs client
|
||||||
|
+ * @dev: a VuDev context
|
||||||
|
+ * @req: The request type (map, unmap, sync)
|
||||||
|
+ * @fd: an fd (only required for map, else must be -1)
|
||||||
|
+ * @fsm: The body of the message
|
||||||
|
+ *
|
||||||
|
+ * Returns: true if the reply was 0
|
||||||
|
+ */
|
||||||
|
+bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
+ VhostUserFSSlaveMsg *fsm);
|
||||||
|
+
|
||||||
|
#endif /* LIBVHOST_USER_H */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,65 @@
|
|||||||
|
From 71c89288b97c92ecb3a67ca8aa73619719dcfe9e Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 27 Jul 2018 12:38:03 +0100
|
||||||
|
Subject: [PATCH 04/29] DAX: virtio: Add shared memory capability
|
||||||
|
|
||||||
|
Define a new capability type 'VIRTIO_PCI_CAP_SHARED_MEMORY_CFG'
|
||||||
|
and the data structure 'virtio_pci_cap64' to go with it.
|
||||||
|
They allow defining shared memory regions with sizes and offsets
|
||||||
|
of 2^32 and more.
|
||||||
|
Multiple instances of the capability are allowed and distinguished
|
||||||
|
by the 'id' field in the base capability.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
hw/virtio/virtio-pci.c | 20 ++++++++++++++++++++
|
||||||
|
hw/virtio/virtio-pci.h | 4 ++++
|
||||||
|
2 files changed, 24 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
|
||||||
|
index f863f69ede..f17ea5a6e8 100644
|
||||||
|
--- a/hw/virtio/virtio-pci.c
|
||||||
|
+++ b/hw/virtio/virtio-pci.c
|
||||||
|
@@ -1136,6 +1136,26 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
+int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
|
||||||
|
+ uint8_t bar, uint64_t offset, uint64_t length,
|
||||||
|
+ uint8_t id)
|
||||||
|
+{
|
||||||
|
+ struct virtio_pci_cap64 cap = {
|
||||||
|
+ .cap.cap_len = sizeof cap,
|
||||||
|
+ .cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG,
|
||||||
|
+ };
|
||||||
|
+ uint32_t mask32 = ~0;
|
||||||
|
+
|
||||||
|
+ cap.cap.bar = bar;
|
||||||
|
+ cap.cap.id = id;
|
||||||
|
+ cap.cap.length = cpu_to_le32(length & mask32);
|
||||||
|
+ cap.length_hi = cpu_to_le32((length >> 32) & mask32);
|
||||||
|
+ cap.cap.offset = cpu_to_le32(offset & mask32);
|
||||||
|
+ cap.offset_hi = cpu_to_le32((offset >> 32) & mask32);
|
||||||
|
+
|
||||||
|
+ return virtio_pci_add_mem_cap(proxy, &cap.cap);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
|
||||||
|
unsigned size)
|
||||||
|
{
|
||||||
|
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
|
||||||
|
index d7d5d403a9..31ca339099 100644
|
||||||
|
--- a/hw/virtio/virtio-pci.h
|
||||||
|
+++ b/hw/virtio/virtio-pci.h
|
||||||
|
@@ -247,4 +247,8 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
|
||||||
|
*/
|
||||||
|
unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
|
||||||
|
|
||||||
|
+int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
|
||||||
|
+ uint8_t bar, uint64_t offset, uint64_t length,
|
||||||
|
+ uint8_t id);
|
||||||
|
+
|
||||||
|
#endif
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,171 @@
|
|||||||
|
From 3996e9086ddd591494f9cb7f0eb7048a1b52200c Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Tue, 3 Jul 2018 16:33:52 +0100
|
||||||
|
Subject: [PATCH 05/29] DAX: virtio-fs: Add cache BAR
|
||||||
|
|
||||||
|
Add a cache BAR into which files will be directly mapped.
|
||||||
|
The size can be set with the cache-size= property, e.g.
|
||||||
|
-device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=16G
|
||||||
|
|
||||||
|
The default is no cache.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
with PPC fixes by:
|
||||||
|
Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
|
||||||
|
---
|
||||||
|
hw/virtio/vhost-user-fs-pci.c | 23 ++++++++++++++++
|
||||||
|
hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++
|
||||||
|
include/hw/virtio/vhost-user-fs.h | 2 ++
|
||||||
|
include/standard-headers/linux/virtio_fs.h | 2 ++
|
||||||
|
4 files changed, 59 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
index 8bb389bd28..19aaa8d722 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
@@ -16,10 +16,12 @@
|
||||||
|
#include "hw/virtio/vhost-user-fs.h"
|
||||||
|
#include "virtio-pci.h"
|
||||||
|
#include "qom/object.h"
|
||||||
|
+#include "standard-headers/linux/virtio_fs.h"
|
||||||
|
|
||||||
|
struct VHostUserFSPCI {
|
||||||
|
VirtIOPCIProxy parent_obj;
|
||||||
|
VHostUserFS vdev;
|
||||||
|
+ MemoryRegion cachebar;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct VHostUserFSPCI VHostUserFSPCI;
|
||||||
|
@@ -39,6 +41,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
|
||||||
|
{
|
||||||
|
VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev);
|
||||||
|
DeviceState *vdev = DEVICE(&dev->vdev);
|
||||||
|
+ uint64_t cachesize;
|
||||||
|
|
||||||
|
if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
|
||||||
|
/* Also reserve config change and hiprio queue vectors */
|
||||||
|
@@ -46,6 +49,26 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
|
||||||
|
}
|
||||||
|
|
||||||
|
qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
|
||||||
|
+ cachesize = dev->vdev.conf.cache_size;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The bar starts with the data/DAX cache
|
||||||
|
+ * Others will be added later.
|
||||||
|
+ */
|
||||||
|
+ memory_region_init(&dev->cachebar, OBJECT(vpci_dev),
|
||||||
|
+ "vhost-fs-pci-cachebar", cachesize);
|
||||||
|
+ if (cachesize) {
|
||||||
|
+ memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache);
|
||||||
|
+ virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize,
|
||||||
|
+ VIRTIO_FS_SHMCAP_ID_CACHE);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* After 'realized' so the memory region exists */
|
||||||
|
+ pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR,
|
||||||
|
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
|
||||||
|
+ PCI_BASE_ADDRESS_MEM_PREFETCH |
|
||||||
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
|
||||||
|
+ &dev->cachebar);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vhost_user_fs_pci_class_init(ObjectClass *klass, void *data)
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index ed036ad9c1..d111bf2af3 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -23,6 +23,16 @@
|
||||||
|
#include "hw/virtio/vhost-user-fs.h"
|
||||||
|
#include "monitor/monitor.h"
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * The powerpc kernel code expects the memory to be accessible during
|
||||||
|
+ * addition/removal.
|
||||||
|
+ */
|
||||||
|
+#if defined(TARGET_PPC64) && defined(CONFIG_LINUX)
|
||||||
|
+#define DAX_WINDOW_PROT PROT_READ
|
||||||
|
+#else
|
||||||
|
+#define DAX_WINDOW_PROT PROT_NONE
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
||||||
|
@@ -162,6 +172,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
{
|
||||||
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(dev);
|
||||||
|
+ void *cache_ptr;
|
||||||
|
unsigned int i;
|
||||||
|
size_t len;
|
||||||
|
int ret;
|
||||||
|
@@ -201,6 +212,26 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
VIRTQUEUE_MAX_SIZE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
+ if (fs->conf.cache_size &&
|
||||||
|
+ (!is_power_of_2(fs->conf.cache_size) ||
|
||||||
|
+ fs->conf.cache_size < sysconf(_SC_PAGESIZE))) {
|
||||||
|
+ error_setg(errp, "cache-size property must be a power of 2 "
|
||||||
|
+ "no smaller than the page size");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (fs->conf.cache_size) {
|
||||||
|
+ /* Anonymous, private memory is not counted as overcommit */
|
||||||
|
+ cache_ptr = mmap(NULL, fs->conf.cache_size, DAX_WINDOW_PROT,
|
||||||
|
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||||
|
+ if (cache_ptr == MAP_FAILED) {
|
||||||
|
+ error_setg(errp, "Unable to mmap blank cache");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memory_region_init_ram_ptr(&fs->cache, OBJECT(vdev),
|
||||||
|
+ "virtio-fs-cache",
|
||||||
|
+ fs->conf.cache_size, cache_ptr);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) {
|
||||||
|
return;
|
||||||
|
@@ -276,6 +307,7 @@ static Property vuf_properties[] = {
|
||||||
|
DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
|
||||||
|
conf.num_request_queues, 1),
|
||||||
|
DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
|
||||||
|
+ DEFINE_PROP_SIZE("cache-size", VHostUserFS, conf.cache_size, 0),
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
index 6985752771..df6bf2a926 100644
|
||||||
|
--- a/include/hw/virtio/vhost-user-fs.h
|
||||||
|
+++ b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
@@ -28,6 +28,7 @@ typedef struct {
|
||||||
|
char *tag;
|
||||||
|
uint16_t num_request_queues;
|
||||||
|
uint16_t queue_size;
|
||||||
|
+ uint64_t cache_size;
|
||||||
|
} VHostUserFSConf;
|
||||||
|
|
||||||
|
struct VHostUserFS {
|
||||||
|
@@ -41,6 +42,7 @@ struct VHostUserFS {
|
||||||
|
VirtQueue *hiprio_vq;
|
||||||
|
|
||||||
|
/*< public >*/
|
||||||
|
+ MemoryRegion cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _QEMU_VHOST_USER_FS_H */
|
||||||
|
diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h
|
||||||
|
index a32fe8a64c..808aa3a402 100644
|
||||||
|
--- a/include/standard-headers/linux/virtio_fs.h
|
||||||
|
+++ b/include/standard-headers/linux/virtio_fs.h
|
||||||
|
@@ -19,4 +19,6 @@ struct virtio_fs_config {
|
||||||
|
/* For the id field in virtio_pci_shm_cap */
|
||||||
|
#define VIRTIO_FS_SHMCAP_ID_CACHE 0
|
||||||
|
|
||||||
|
+#define VIRTIO_FS_PCI_CACHE_BAR 2
|
||||||
|
+
|
||||||
|
#endif /* _LINUX_VIRTIO_FS_H */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,191 @@
|
|||||||
|
From 27ccc5e4aecbffd590199bae897a8359889fd54d Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Wed, 4 Jul 2018 18:51:42 +0100
|
||||||
|
Subject: [PATCH 06/29] DAX: virtio-fs: Add vhost-user slave commands for
|
||||||
|
mapping
|
||||||
|
|
||||||
|
The daemon may request that fd's be mapped into the virtio-fs cache
|
||||||
|
visible to the guest.
|
||||||
|
These mappings are triggered by commands sent over the slave fd
|
||||||
|
from the daemon.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
docs/interop/vhost-user.rst | 23 ++++++++++++++++++++++
|
||||||
|
hw/virtio/vhost-user-fs.c | 19 ++++++++++++++++++
|
||||||
|
hw/virtio/vhost-user.c | 18 +++++++++++++++++
|
||||||
|
include/hw/virtio/vhost-user-fs.h | 24 +++++++++++++++++++++++
|
||||||
|
subprojects/libvhost-user/libvhost-user.h | 3 +++
|
||||||
|
5 files changed, 87 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
|
||||||
|
index d6085f7045..056f94c6fb 100644
|
||||||
|
--- a/docs/interop/vhost-user.rst
|
||||||
|
+++ b/docs/interop/vhost-user.rst
|
||||||
|
@@ -1432,6 +1432,29 @@ Slave message types
|
||||||
|
|
||||||
|
The state.num field is currently reserved and must be set to 0.
|
||||||
|
|
||||||
|
+``VHOST_USER_SLAVE_FS_MAP``
|
||||||
|
+ :id: 6
|
||||||
|
+ :equivalent ioctl: N/A
|
||||||
|
+ :slave payload: fd + n * (offset + address + len)
|
||||||
|
+ :master payload: N/A
|
||||||
|
+
|
||||||
|
+ Requests that the QEMU mmap the given fd into the virtio-fs cache;
|
||||||
|
+ multiple chunks can be mapped in one command.
|
||||||
|
+ A reply is generated indicating whether mapping succeeded.
|
||||||
|
+
|
||||||
|
+``VHOST_USER_SLAVE_FS_UNMAP``
|
||||||
|
+ :id: 7
|
||||||
|
+ :equivalent ioctl: N/A
|
||||||
|
+ :slave payload: n * (address + len)
|
||||||
|
+ :master payload: N/A
|
||||||
|
+
|
||||||
|
+ Requests that the QEMU un-mmap the given range in the virtio-fs cache;
|
||||||
|
+ multiple chunks can be unmapped in one command.
|
||||||
|
+ A reply is generated indicating whether unmapping succeeded.
|
||||||
|
+
|
||||||
|
+``VHOST_USER_SLAVE_FS_SYNC``
|
||||||
|
+ [Semantic details TBD]
|
||||||
|
+
|
||||||
|
.. _reply_ack:
|
||||||
|
|
||||||
|
VHOST_USER_PROTOCOL_F_REPLY_ACK
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index d111bf2af3..9c35fdbeab 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -33,6 +33,25 @@
|
||||||
|
#define DAX_WINDOW_PROT PROT_NONE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd)
|
||||||
|
+{
|
||||||
|
+ /* TODO */
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
+{
|
||||||
|
+ /* TODO */
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
+{
|
||||||
|
+ /* TODO */
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
||||||
|
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||||
|
index 2fdd5daf74..757dee0d1e 100644
|
||||||
|
--- a/hw/virtio/vhost-user.c
|
||||||
|
+++ b/hw/virtio/vhost-user.c
|
||||||
|
@@ -12,6 +12,7 @@
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "hw/virtio/vhost.h"
|
||||||
|
#include "hw/virtio/vhost-user.h"
|
||||||
|
+#include "hw/virtio/vhost-user-fs.h"
|
||||||
|
#include "hw/virtio/vhost-backend.h"
|
||||||
|
#include "hw/virtio/virtio.h"
|
||||||
|
#include "hw/virtio/virtio-net.h"
|
||||||
|
@@ -132,6 +133,11 @@ typedef enum VhostUserSlaveRequest {
|
||||||
|
VHOST_USER_SLAVE_IOTLB_MSG = 1,
|
||||||
|
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
|
||||||
|
VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
|
||||||
|
+ VHOST_USER_SLAVE_VRING_CALL = 4,
|
||||||
|
+ VHOST_USER_SLAVE_VRING_ERR = 5,
|
||||||
|
+ VHOST_USER_SLAVE_FS_MAP = 6,
|
||||||
|
+ VHOST_USER_SLAVE_FS_UNMAP = 7,
|
||||||
|
+ VHOST_USER_SLAVE_FS_SYNC = 8,
|
||||||
|
VHOST_USER_SLAVE_MAX
|
||||||
|
} VhostUserSlaveRequest;
|
||||||
|
|
||||||
|
@@ -218,6 +224,7 @@ typedef union {
|
||||||
|
VhostUserCryptoSession session;
|
||||||
|
VhostUserVringArea area;
|
||||||
|
VhostUserInflight inflight;
|
||||||
|
+ VhostUserFSSlaveMsg fs;
|
||||||
|
} VhostUserPayload;
|
||||||
|
|
||||||
|
typedef struct VhostUserMsg {
|
||||||
|
@@ -1470,6 +1477,17 @@ static void slave_read(void *opaque)
|
||||||
|
ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
|
||||||
|
fd[0]);
|
||||||
|
break;
|
||||||
|
+#ifdef CONFIG_VHOST_USER_FS
|
||||||
|
+ case VHOST_USER_SLAVE_FS_MAP:
|
||||||
|
+ ret = vhost_user_fs_slave_map(dev, &payload.fs, fd[0]);
|
||||||
|
+ break;
|
||||||
|
+ case VHOST_USER_SLAVE_FS_UNMAP:
|
||||||
|
+ ret = vhost_user_fs_slave_unmap(dev, &payload.fs);
|
||||||
|
+ break;
|
||||||
|
+ case VHOST_USER_SLAVE_FS_SYNC:
|
||||||
|
+ ret = vhost_user_fs_slave_sync(dev, &payload.fs);
|
||||||
|
+ break;
|
||||||
|
+#endif
|
||||||
|
default:
|
||||||
|
error_report("Received unexpected msg type: %d.", hdr.request);
|
||||||
|
ret = -EINVAL;
|
||||||
|
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
index df6bf2a926..69cc6340ed 100644
|
||||||
|
--- a/include/hw/virtio/vhost-user-fs.h
|
||||||
|
+++ b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
@@ -23,6 +23,24 @@
|
||||||
|
#define TYPE_VHOST_USER_FS "vhost-user-fs-device"
|
||||||
|
OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS)
|
||||||
|
|
||||||
|
+/* Structures carried over the slave channel back to QEMU */
|
||||||
|
+#define VHOST_USER_FS_SLAVE_ENTRIES 8
|
||||||
|
+
|
||||||
|
+/* For the flags field of VhostUserFSSlaveMsg */
|
||||||
|
+#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0)
|
||||||
|
+#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1)
|
||||||
|
+
|
||||||
|
+typedef struct {
|
||||||
|
+ /* Offsets within the file being mapped */
|
||||||
|
+ uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Offsets within the cache */
|
||||||
|
+ uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Lengths of sections */
|
||||||
|
+ uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+ /* Flags, from VHOST_USER_FS_FLAG_* */
|
||||||
|
+ uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
|
||||||
|
+} VhostUserFSSlaveMsg;
|
||||||
|
+
|
||||||
|
typedef struct {
|
||||||
|
CharBackend chardev;
|
||||||
|
char *tag;
|
||||||
|
@@ -45,4 +63,10 @@ struct VHostUserFS {
|
||||||
|
MemoryRegion cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* Callbacks from the vhost-user code for slave commands */
|
||||||
|
+int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd);
|
||||||
|
+int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
+int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
+
|
||||||
|
#endif /* _QEMU_VHOST_USER_FS_H */
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
index f3b0998eea..c63a590069 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
@@ -119,6 +119,9 @@ typedef enum VhostUserSlaveRequest {
|
||||||
|
VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
|
||||||
|
VHOST_USER_SLAVE_VRING_CALL = 4,
|
||||||
|
VHOST_USER_SLAVE_VRING_ERR = 5,
|
||||||
|
+ VHOST_USER_SLAVE_FS_MAP = 6,
|
||||||
|
+ VHOST_USER_SLAVE_FS_UNMAP = 7,
|
||||||
|
+ VHOST_USER_SLAVE_FS_SYNC = 8,
|
||||||
|
VHOST_USER_SLAVE_MAX
|
||||||
|
} VhostUserSlaveRequest;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,98 @@
|
|||||||
|
From 3de89ce9fb5eda46f7cefd70e9090cb7cd7ec803 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yang Zhong <yang.zhong@intel.com>
|
||||||
|
Date: Wed, 28 Mar 2018 20:14:53 +0800
|
||||||
|
Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O
|
||||||
|
performance
|
||||||
|
|
||||||
|
This is a quick workaround, need to be fixed.
|
||||||
|
|
||||||
|
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
|
||||||
|
---
|
||||||
|
hw/9pfs/9p.c | 12 +++++-------
|
||||||
|
hw/9pfs/9p.h | 6 +++---
|
||||||
|
hw/9pfs/coth.h | 3 +++
|
||||||
|
3 files changed, 11 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
|
||||||
|
index 9e046f7acb..11c8ee08d9 100644
|
||||||
|
--- a/hw/9pfs/9p.c
|
||||||
|
+++ b/hw/9pfs/9p.c
|
||||||
|
@@ -1082,10 +1082,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
|
||||||
|
out_notify:
|
||||||
|
pdu->s->transport->push_and_notify(pdu);
|
||||||
|
|
||||||
|
- /* Now wakeup anybody waiting in flush for this request */
|
||||||
|
- if (!qemu_co_queue_next(&pdu->complete)) {
|
||||||
|
- pdu_free(pdu);
|
||||||
|
- }
|
||||||
|
+ pdu_free(pdu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
|
||||||
|
@@ -3997,7 +3994,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu)
|
||||||
|
|
||||||
|
void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
|
||||||
|
{
|
||||||
|
- Coroutine *co;
|
||||||
|
+// Coroutine *co;
|
||||||
|
CoroutineEntry *handler;
|
||||||
|
V9fsState *s = pdu->s;
|
||||||
|
|
||||||
|
@@ -4015,8 +4012,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_co_queue_init(&pdu->complete);
|
||||||
|
- co = qemu_coroutine_create(handler, pdu);
|
||||||
|
- qemu_coroutine_enter(co);
|
||||||
|
+ handler(pdu);
|
||||||
|
+ //co = qemu_coroutine_create(handler, pdu);
|
||||||
|
+ //qemu_coroutine_enter(co);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns 0 on success, 1 on failure. */
|
||||||
|
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
|
||||||
|
index b8f72a3bd9..d16bf9d05e 100644
|
||||||
|
--- a/hw/9pfs/9p.h
|
||||||
|
+++ b/hw/9pfs/9p.h
|
||||||
|
@@ -391,21 +391,21 @@ extern int total_open_fd;
|
||||||
|
static inline void v9fs_path_write_lock(V9fsState *s)
|
||||||
|
{
|
||||||
|
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||||
|
- qemu_co_rwlock_wrlock(&s->rename_lock);
|
||||||
|
+ // qemu_co_rwlock_wrlock(&s->rename_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void v9fs_path_read_lock(V9fsState *s)
|
||||||
|
{
|
||||||
|
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||||
|
- qemu_co_rwlock_rdlock(&s->rename_lock);
|
||||||
|
+ // qemu_co_rwlock_rdlock(&s->rename_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void v9fs_path_unlock(V9fsState *s)
|
||||||
|
{
|
||||||
|
if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
|
||||||
|
- qemu_co_rwlock_unlock(&s->rename_lock);
|
||||||
|
+ // qemu_co_rwlock_unlock(&s->rename_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
|
||||||
|
index c2cdc7a9ea..0fe971d1f5 100644
|
||||||
|
--- a/hw/9pfs/coth.h
|
||||||
|
+++ b/hw/9pfs/coth.h
|
||||||
|
@@ -46,6 +46,9 @@
|
||||||
|
qemu_coroutine_yield(); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
+#undef v9fs_co_run_in_worker
|
||||||
|
+#define v9fs_co_run_in_worker(code_block) do {code_block} while(0);
|
||||||
|
+
|
||||||
|
void co_run_in_worker_bh(void *);
|
||||||
|
int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
|
||||||
|
int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
|
||||||
|
--
|
||||||
|
2.21.0
|
||||||
|
|
@ -0,0 +1,196 @@
|
|||||||
|
From a0d09868a25b9b15b8ef49402b035597ef889f85 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Wed, 4 Jul 2018 20:01:51 +0100
|
||||||
|
Subject: [PATCH 07/29] DAX: virtio-fs: Fill in slave commands for mapping
|
||||||
|
|
||||||
|
Fill in definitions for map, unmap and sync commands.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
with fix by misono.tomohiro@fujitsu.com
|
||||||
|
---
|
||||||
|
hw/virtio/vhost-user-fs.c | 161 ++++++++++++++++++++++++++++++++++++--
|
||||||
|
1 file changed, 155 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index 9c35fdbeab..98cec993f7 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -36,20 +36,169 @@
|
||||||
|
int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
int fd)
|
||||||
|
{
|
||||||
|
- /* TODO */
|
||||||
|
- return -1;
|
||||||
|
+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
+ if (!fs) {
|
||||||
|
+ /* Shouldn't happen - but seen on error path */
|
||||||
|
+ fprintf(stderr, "%s: Bad fs ptr\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ size_t cache_size = fs->conf.cache_size;
|
||||||
|
+ if (!cache_size) {
|
||||||
|
+ fprintf(stderr, "%s: map when DAX cache not present\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
+
|
||||||
|
+ unsigned int i;
|
||||||
|
+ int res = 0;
|
||||||
|
+
|
||||||
|
+ if (fd < 0) {
|
||||||
|
+ fprintf(stderr, "%s: Bad fd for map\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) {
|
||||||
|
+ if (sm->len[i] == 0) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] ||
|
||||||
|
+ (sm->c_offset[i] + sm->len[i]) > cache_size) {
|
||||||
|
+ fprintf(stderr, "%s: Bad offset/len for map [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 "\n", __func__,
|
||||||
|
+ i, sm->c_offset[i], sm->len[i]);
|
||||||
|
+ res = -1;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (mmap(cache_host + sm->c_offset[i], sm->len[i],
|
||||||
|
+ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) ? PROT_READ : 0) |
|
||||||
|
+ ((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0),
|
||||||
|
+ MAP_SHARED | MAP_FIXED,
|
||||||
|
+ fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) {
|
||||||
|
+ fprintf(stderr, "%s: map failed err %d [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__,
|
||||||
|
+ errno, i, sm->c_offset[i], sm->len[i],
|
||||||
|
+ sm->fd_offset[i]);
|
||||||
|
+ res = -1;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (res) {
|
||||||
|
+ /* Something went wrong, unmap them all */
|
||||||
|
+ vhost_user_fs_slave_unmap(dev, sm);
|
||||||
|
+ }
|
||||||
|
+ return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
{
|
||||||
|
- /* TODO */
|
||||||
|
- return -1;
|
||||||
|
+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
+ if (!fs) {
|
||||||
|
+ /* Shouldn't happen - but seen on error path */
|
||||||
|
+ fprintf(stderr, "%s: Bad fs ptr\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ size_t cache_size = fs->conf.cache_size;
|
||||||
|
+ if (!cache_size) {
|
||||||
|
+ /*
|
||||||
|
+ * Since dax cache is disabled, there should be no unmap request.
|
||||||
|
+ * Howerver we still receives whole range unmap request during umount
|
||||||
|
+ * for cleanup. Ignore it.
|
||||||
|
+ */
|
||||||
|
+ if (sm->len[0] == ~(uint64_t)0) {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
+
|
||||||
|
+ unsigned int i;
|
||||||
|
+ int res = 0;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Note even if one unmap fails we try the rest, since the effect
|
||||||
|
+ * is to clean up as much as possible.
|
||||||
|
+ */
|
||||||
|
+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) {
|
||||||
|
+ void *ptr;
|
||||||
|
+ if (sm->len[i] == 0) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (sm->len[i] == ~(uint64_t)0) {
|
||||||
|
+ /* Special case meaning the whole arena */
|
||||||
|
+ sm->len[i] = cache_size;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] ||
|
||||||
|
+ (sm->c_offset[i] + sm->len[i]) > cache_size) {
|
||||||
|
+ fprintf(stderr, "%s: Bad offset/len for unmap [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 "\n", __func__,
|
||||||
|
+ i, sm->c_offset[i], sm->len[i]);
|
||||||
|
+ res = -1;
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT,
|
||||||
|
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||||
|
+ if (ptr != (cache_host + sm->c_offset[i])) {
|
||||||
|
+ fprintf(stderr, "%s: mmap failed (%s) [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n",
|
||||||
|
+ __func__,
|
||||||
|
+ strerror(errno),
|
||||||
|
+ i, sm->c_offset[i], sm->len[i],
|
||||||
|
+ sm->fd_offset[i], ptr);
|
||||||
|
+ res = -1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
{
|
||||||
|
- /* TODO */
|
||||||
|
- return -1;
|
||||||
|
+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
+ size_t cache_size = fs->conf.cache_size;
|
||||||
|
+ if (!cache_size) {
|
||||||
|
+ fprintf(stderr, "%s: sync when DAX cache not present\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
+
|
||||||
|
+ unsigned int i;
|
||||||
|
+ int res = 0;
|
||||||
|
+
|
||||||
|
+ /* Note even if one sync fails we try the rest */
|
||||||
|
+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) {
|
||||||
|
+ if (sm->len[i] == 0) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if ((sm->c_offset[i] + sm->len[i]) < sm->len[i] ||
|
||||||
|
+ (sm->c_offset[i] + sm->len[i]) > cache_size) {
|
||||||
|
+ fprintf(stderr, "%s: Bad offset/len for sync [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 "\n", __func__,
|
||||||
|
+ i, sm->c_offset[i], sm->len[i]);
|
||||||
|
+ res = -1;
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (msync(cache_host + sm->c_offset[i], sm->len[i],
|
||||||
|
+ MS_SYNC /* ?? */)) {
|
||||||
|
+ fprintf(stderr, "%s: msync failed (%s) [%d] %"
|
||||||
|
+ PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__,
|
||||||
|
+ strerror(errno),
|
||||||
|
+ i, sm->c_offset[i], sm->len[i],
|
||||||
|
+ sm->fd_offset[i]);
|
||||||
|
+ res = -1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,99 @@
|
|||||||
|
From b341b9541023b0a9f0a315ef24e81522b273e552 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Thu, 5 Jul 2018 18:20:34 +0100
|
||||||
|
Subject: [PATCH 08/29] DAX: virtiofsd Add cache accessor functions
|
||||||
|
|
||||||
|
Add low level functions that the clients can use to map/unmap/sync cache
|
||||||
|
areas.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 31 +++++++++++++++++++++++++++++++
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 27 +++++++++++++++++++++++++++
|
||||||
|
2 files changed, 58 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index 0e10a14bc9..b0d111bcb2 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -29,6 +29,8 @@
|
||||||
|
#include <sys/uio.h>
|
||||||
|
#include <utime.h>
|
||||||
|
|
||||||
|
+#include "subprojects/libvhost-user/libvhost-user.h"
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Miscellaneous definitions
|
||||||
|
*/
|
||||||
|
@@ -1970,4 +1972,33 @@ void fuse_session_process_buf(struct fuse_session *se,
|
||||||
|
*/
|
||||||
|
int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf);
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * For use with virtio-fs; request an fd be mapped into the cache
|
||||||
|
+ *
|
||||||
|
+ * @param req The request that triggered this action
|
||||||
|
+ * @param msg A set of mapping requests
|
||||||
|
+ * @param fd The fd to map
|
||||||
|
+ * @return Zero on success
|
||||||
|
+ */
|
||||||
|
+int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * For use with virtio-fs; request unmapping of part of the cache
|
||||||
|
+ *
|
||||||
|
+ * @param se The session this request is on
|
||||||
|
+ * @param msg A set of unmapping requests
|
||||||
|
+ * @return Zero on success
|
||||||
|
+ */
|
||||||
|
+int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg);
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * For use with virtio-fs; request synchronisation of part of the cache
|
||||||
|
+ * [Semantics TBD]
|
||||||
|
+ *
|
||||||
|
+ * @param req The request that triggered this action
|
||||||
|
+ * @param msg A set of syncing requests
|
||||||
|
+ * @return Zero on success
|
||||||
|
+ */
|
||||||
|
+int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg);
|
||||||
|
+
|
||||||
|
#endif /* FUSE_LOWLEVEL_H_ */
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index bd19358437..24d9323665 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -1044,3 +1044,30 @@ void virtio_session_close(struct fuse_session *se)
|
||||||
|
free(se->virtio_dev);
|
||||||
|
se->virtio_dev = NULL;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd)
|
||||||
|
+{
|
||||||
|
+ if (!req->se->virtio_dev) {
|
||||||
|
+ return -ENODEV;
|
||||||
|
+ }
|
||||||
|
+ return !vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
+ VHOST_USER_SLAVE_FS_MAP, fd, msg);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg)
|
||||||
|
+{
|
||||||
|
+ if (!se->virtio_dev) {
|
||||||
|
+ return -ENODEV;
|
||||||
|
+ }
|
||||||
|
+ return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP,
|
||||||
|
+ -1, msg);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg)
|
||||||
|
+{
|
||||||
|
+ if (!req->se->virtio_dev) {
|
||||||
|
+ return -ENODEV;
|
||||||
|
+ }
|
||||||
|
+ return !vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
+ VHOST_USER_SLAVE_FS_SYNC, -1, msg);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,152 @@
|
|||||||
|
From c3273cefbec6f5637189ad1cb9a8b7722cc01294 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 6 Jul 2018 18:03:49 +0100
|
||||||
|
Subject: [PATCH 09/29] DAX: virtiofsd: Add setup/remove mappings fuse commands
|
||||||
|
|
||||||
|
Add commands so that the guest kernel can ask the daemon to map file
|
||||||
|
sections into a guest kernel visible cache.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 67 +++++++++++++++++++++++++++++++++
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 23 ++++++++++-
|
||||||
|
2 files changed, 89 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index e94b71110b..1c3790130a 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -1868,6 +1868,71 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
+ struct fuse_mbuf_iter *iter)
|
||||||
|
+{
|
||||||
|
+ struct fuse_setupmapping_in *arg;
|
||||||
|
+ struct fuse_file_info fi;
|
||||||
|
+
|
||||||
|
+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
|
||||||
|
+ if (!arg) {
|
||||||
|
+ fuse_reply_err(req, EINVAL);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memset(&fi, 0, sizeof(fi));
|
||||||
|
+ fi.fh = arg->fh;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * TODO: Need to come up with a better definition of flags here; it can't
|
||||||
|
+ * be the kernel view of the flags, since that's abstracted from the client
|
||||||
|
+ * similarly, it's not the vhost-user set
|
||||||
|
+ * for now just use O_ flags
|
||||||
|
+ */
|
||||||
|
+ uint64_t genflags;
|
||||||
|
+
|
||||||
|
+ genflags = O_RDONLY;
|
||||||
|
+ if (arg->flags & FUSE_SETUPMAPPING_FLAG_WRITE) {
|
||||||
|
+ genflags = O_RDWR;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (req->se->op.setupmapping) {
|
||||||
|
+ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len,
|
||||||
|
+ arg->moffset, genflags, &fi);
|
||||||
|
+ } else {
|
||||||
|
+ fuse_reply_err(req, ENOSYS);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void do_removemapping(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
+ struct fuse_mbuf_iter *iter)
|
||||||
|
+{
|
||||||
|
+ struct fuse_removemapping_in *arg;
|
||||||
|
+ struct fuse_removemapping_one *one;
|
||||||
|
+
|
||||||
|
+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
|
||||||
|
+ if (!arg) {
|
||||||
|
+ fuse_reply_err(req, EINVAL);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ one = fuse_mbuf_iter_advance(iter, sizeof(*one));
|
||||||
|
+ if (!one) {
|
||||||
|
+ fuse_log(
|
||||||
|
+ FUSE_LOG_ERR,
|
||||||
|
+ "do_removemapping: invalid in, expected %d * %ld, has %ld - %ld\n",
|
||||||
|
+ arg->count, sizeof(*one), iter->size, iter->pos);
|
||||||
|
+ fuse_reply_err(req, EINVAL);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (req->se->op.removemapping) {
|
||||||
|
+ req->se->op.removemapping(req, req->se, nodeid, arg->count, one);
|
||||||
|
+ } else {
|
||||||
|
+ fuse_reply_err(req, ENOSYS);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void do_init(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
struct fuse_mbuf_iter *iter)
|
||||||
|
{
|
||||||
|
@@ -2258,6 +2323,8 @@ static struct {
|
||||||
|
[FUSE_RENAME2] = { do_rename2, "RENAME2" },
|
||||||
|
[FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
|
||||||
|
[FUSE_LSEEK] = { do_lseek, "LSEEK" },
|
||||||
|
+ [FUSE_SETUPMAPPING] = { do_setupmapping, "SETUPMAPPING" },
|
||||||
|
+ [FUSE_REMOVEMAPPING] = { do_removemapping, "REMOVEMAPPING" },
|
||||||
|
};
|
||||||
|
|
||||||
|
#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index b0d111bcb2..2851840cc2 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -24,6 +24,7 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "fuse_common.h"
|
||||||
|
+#include "standard-headers/linux/fuse.h"
|
||||||
|
|
||||||
|
#include <sys/statvfs.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
|
@@ -1170,7 +1171,6 @@ struct fuse_lowlevel_ops {
|
||||||
|
*/
|
||||||
|
void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
|
||||||
|
struct fuse_file_info *fi);
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* Copy a range of data from one file to another
|
||||||
|
*
|
||||||
|
@@ -1226,6 +1226,27 @@ struct fuse_lowlevel_ops {
|
||||||
|
*/
|
||||||
|
void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
|
||||||
|
struct fuse_file_info *fi);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Map file sections into kernel visible cache
|
||||||
|
+ *
|
||||||
|
+ * Map a section of the file into address space visible to the kernel
|
||||||
|
+ * mounting the filesystem.
|
||||||
|
+ * TODO
|
||||||
|
+ */
|
||||||
|
+ void (*setupmapping)(fuse_req_t req, fuse_ino_t ino, uint64_t foffset,
|
||||||
|
+ uint64_t len, uint64_t moffset, uint64_t flags,
|
||||||
|
+ struct fuse_file_info *fi);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Unmap file sections in kernel visible cache
|
||||||
|
+ *
|
||||||
|
+ * Unmap sections previously mapped by setupmapping
|
||||||
|
+ * TODO
|
||||||
|
+ */
|
||||||
|
+ void (*removemapping)(fuse_req_t req, struct fuse_session *se,
|
||||||
|
+ fuse_ino_t ino, unsigned num,
|
||||||
|
+ struct fuse_removemapping_one *argp);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
From 7029506e6b23fc15f2b7c4a6a62aa3a0ee58fb02 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 6 Jul 2018 19:52:49 +0100
|
||||||
|
Subject: [PATCH 10/29] DAX: virtiofsd: Add setup/remove mapping handlers to
|
||||||
|
passthrough_ll
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 18 ++++++++++++++++++
|
||||||
|
1 file changed, 18 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 5fb36d9407..784bdcff34 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2891,6 +2891,22 @@ static void lo_destroy(void *userdata)
|
||||||
|
pthread_mutex_unlock(&lo->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset,
|
||||||
|
+ uint64_t len, uint64_t moffset, uint64_t flags,
|
||||||
|
+ struct fuse_file_info *fi)
|
||||||
|
+{
|
||||||
|
+ // TODO
|
||||||
|
+ fuse_reply_err(req, ENOSYS);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void lo_removemapping(fuse_req_t req, struct fuse_session *se,
|
||||||
|
+ fuse_ino_t ino, unsigned num,
|
||||||
|
+ struct fuse_removemapping_one *argp)
|
||||||
|
+{
|
||||||
|
+ // TODO
|
||||||
|
+ fuse_reply_err(req, ENOSYS);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static struct fuse_lowlevel_ops lo_oper = {
|
||||||
|
.init = lo_init,
|
||||||
|
.lookup = lo_lookup,
|
||||||
|
@@ -2932,6 +2948,8 @@ static struct fuse_lowlevel_ops lo_oper = {
|
||||||
|
#endif
|
||||||
|
.lseek = lo_lseek,
|
||||||
|
.destroy = lo_destroy,
|
||||||
|
+ .setupmapping = lo_setupmapping,
|
||||||
|
+ .removemapping = lo_removemapping,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Print vhost-user.json backend program capabilities */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,53 @@
|
|||||||
|
From 15fb0e84e38c2681e855e69b58414ba831b399bf Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 9 Jul 2018 19:57:16 +0100
|
||||||
|
Subject: [PATCH 11/29] DAX: virtiofsd: Wire up passthrough_ll's
|
||||||
|
lo_setupmapping
|
||||||
|
|
||||||
|
Wire up passthrough_ll's setupmapping to allocate, send to virtio
|
||||||
|
and then reply OK.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 24 ++++++++++++++++++++++--
|
||||||
|
1 file changed, 22 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 784bdcff34..b57cb4079e 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2895,8 +2895,28 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset,
|
||||||
|
uint64_t len, uint64_t moffset, uint64_t flags,
|
||||||
|
struct fuse_file_info *fi)
|
||||||
|
{
|
||||||
|
- // TODO
|
||||||
|
- fuse_reply_err(req, ENOSYS);
|
||||||
|
+ int ret = 0;
|
||||||
|
+ VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
+ uint64_t vhu_flags;
|
||||||
|
+ bool writable = flags & O_RDWR;
|
||||||
|
+
|
||||||
|
+ vhu_flags = VHOST_USER_FS_FLAG_MAP_R;
|
||||||
|
+ if (writable) {
|
||||||
|
+ vhu_flags |= VHOST_USER_FS_FLAG_MAP_W;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ msg.fd_offset[0] = foffset;
|
||||||
|
+ msg.len[0] = len;
|
||||||
|
+ msg.c_offset[0] = moffset;
|
||||||
|
+ msg.flags[0] = vhu_flags;
|
||||||
|
+
|
||||||
|
+ if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) {
|
||||||
|
+ fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__,
|
||||||
|
+ (int)fi->fh);
|
||||||
|
+ ret = EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fuse_reply_err(req, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void lo_removemapping(fuse_req_t req, struct fuse_session *se,
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,43 @@
|
|||||||
|
From 17cf13d652885b2c3a09fbbab1cb503f53c27d96 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
Date: Mon, 13 Aug 2018 11:52:43 -0400
|
||||||
|
Subject: [PATCH 12/29] DAX: virtiofsd: Make lo_removemapping() work
|
||||||
|
|
||||||
|
Let guest pass in the offset in dax window a mapping is currently
|
||||||
|
mapped at and needs to be removed.
|
||||||
|
|
||||||
|
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 16 ++++++++++++++--
|
||||||
|
1 file changed, 14 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index b57cb4079e..056b395574 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2923,8 +2923,20 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se,
|
||||||
|
fuse_ino_t ino, unsigned num,
|
||||||
|
struct fuse_removemapping_one *argp)
|
||||||
|
{
|
||||||
|
- // TODO
|
||||||
|
- fuse_reply_err(req, ENOSYS);
|
||||||
|
+ VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
+ int ret = 0;
|
||||||
|
+
|
||||||
|
+ msg.len[0] = argp->len;
|
||||||
|
+ msg.c_offset[0] = argp->moffset;
|
||||||
|
+ if (fuse_virtio_unmap(se, &msg)) {
|
||||||
|
+ fprintf(stderr,
|
||||||
|
+ "%s: unmap over virtio failed "
|
||||||
|
+ "(offset=0x%lx, len=0x%lx)\n",
|
||||||
|
+ __func__, argp->moffset, argp->len);
|
||||||
|
+ ret = EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fuse_reply_err(req, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct fuse_lowlevel_ops lo_oper = {
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,104 @@
|
|||||||
|
From a3f692a36307054148e7db640dc7a64158a98250 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
Date: Thu, 30 Aug 2018 14:22:10 -0400
|
||||||
|
Subject: [PATCH 13/29] DAX: virtiofsd: Make setupmapping work only with inode
|
||||||
|
|
||||||
|
Guest might not pass file pointer. In that case using inode info, open
|
||||||
|
the file again, mmap() and close fd.
|
||||||
|
|
||||||
|
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
|
||||||
|
With fix from:
|
||||||
|
Signed-off-by: Fotis Xenakis <foxen@windowslive.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 13 ++++++++++--
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++++++++----
|
||||||
|
2 files changed, 43 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index 1c3790130a..4cfd4c3547 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -1897,8 +1897,17 @@ static void do_setupmapping(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req->se->op.setupmapping) {
|
||||||
|
- req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len,
|
||||||
|
- arg->moffset, genflags, &fi);
|
||||||
|
+ /*
|
||||||
|
+ * TODO: Add a flag to request which tells if arg->fh is
|
||||||
|
+ * valid or not.
|
||||||
|
+ */
|
||||||
|
+ if (fi.fh == (uint64_t)-1) {
|
||||||
|
+ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len,
|
||||||
|
+ arg->moffset, genflags, NULL);
|
||||||
|
+ } else {
|
||||||
|
+ req->se->op.setupmapping(req, nodeid, arg->foffset, arg->len,
|
||||||
|
+ arg->moffset, genflags, &fi);
|
||||||
|
+ }
|
||||||
|
} else {
|
||||||
|
fuse_reply_err(req, ENOSYS);
|
||||||
|
}
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 056b395574..ebd5a9b215 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2895,11 +2895,19 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset,
|
||||||
|
uint64_t len, uint64_t moffset, uint64_t flags,
|
||||||
|
struct fuse_file_info *fi)
|
||||||
|
{
|
||||||
|
- int ret = 0;
|
||||||
|
+ struct lo_data *lo = lo_data(req);
|
||||||
|
+ int ret = 0, fd, res;
|
||||||
|
VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
uint64_t vhu_flags;
|
||||||
|
+ char *buf;
|
||||||
|
bool writable = flags & O_RDWR;
|
||||||
|
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG,
|
||||||
|
+ "lo_setupmapping(ino=%" PRIu64 ", fi=0x%p,"
|
||||||
|
+ " foffset=%" PRIu64 ", len=%" PRIu64 ", moffset=%" PRIu64
|
||||||
|
+ ", flags=%" PRIu64 ")\n",
|
||||||
|
+ ino, (void *)fi, foffset, len, moffset, flags);
|
||||||
|
+
|
||||||
|
vhu_flags = VHOST_USER_FS_FLAG_MAP_R;
|
||||||
|
if (writable) {
|
||||||
|
vhu_flags |= VHOST_USER_FS_FLAG_MAP_W;
|
||||||
|
@@ -2910,12 +2918,32 @@ static void lo_setupmapping(fuse_req_t req, fuse_ino_t ino, uint64_t foffset,
|
||||||
|
msg.c_offset[0] = moffset;
|
||||||
|
msg.flags[0] = vhu_flags;
|
||||||
|
|
||||||
|
- if (fuse_virtio_map(req, &msg, lo_fi_fd(req, fi))) {
|
||||||
|
- fprintf(stderr, "%s: map over virtio failed (fd=%d)\n", __func__,
|
||||||
|
- (int)fi->fh);
|
||||||
|
+ if (fi) {
|
||||||
|
+ fd = lo_fi_fd(req, fi);
|
||||||
|
+ } else {
|
||||||
|
+ res = asprintf(&buf, "%i", lo_fd(req, ino));
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ return (void)fuse_reply_err(req, errno);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fd = openat(lo->proc_self_fd, buf, flags);
|
||||||
|
+ free(buf);
|
||||||
|
+ if (fd == -1) {
|
||||||
|
+ return (void)fuse_reply_err(req, errno);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (fuse_virtio_map(req, &msg, fd)) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR,
|
||||||
|
+ "%s: map over virtio failed (ino=%" PRId64
|
||||||
|
+ "fd=%d moffset=0x%" PRIx64 ")\n",
|
||||||
|
+ __func__, ino, fi ? (int)fi->fh : lo_fd(req, ino), moffset);
|
||||||
|
ret = EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (!fi) {
|
||||||
|
+ close(fd);
|
||||||
|
+ }
|
||||||
|
fuse_reply_err(req, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,75 @@
|
|||||||
|
From 7c14a24ad467b9404b95345c64e8c5ef5e6d209c Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 30 Nov 2018 11:47:36 +0000
|
||||||
|
Subject: [PATCH 14/29] DAX: virtiofsd: route se down to destroy method
|
||||||
|
|
||||||
|
We're going to need to pass the session down to destroy so that it can
|
||||||
|
pass it back to do the remove mapping.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 6 +++---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 2 +-
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 2 +-
|
||||||
|
3 files changed, 5 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index 4cfd4c3547..a2480d4aa1 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -2211,7 +2211,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
se->got_destroy = 1;
|
||||||
|
se->got_init = 0;
|
||||||
|
if (se->op.destroy) {
|
||||||
|
- se->op.destroy(se->userdata);
|
||||||
|
+ se->op.destroy(se->userdata, se);
|
||||||
|
}
|
||||||
|
|
||||||
|
send_reply_ok(req, NULL, 0);
|
||||||
|
@@ -2438,7 +2438,7 @@ void fuse_session_process_buf_int(struct fuse_session *se,
|
||||||
|
se->got_destroy = 1;
|
||||||
|
se->got_init = 0;
|
||||||
|
if (se->op.destroy) {
|
||||||
|
- se->op.destroy(se->userdata);
|
||||||
|
+ se->op.destroy(se->userdata, se);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
goto reply_err;
|
||||||
|
@@ -2526,7 +2526,7 @@ void fuse_session_destroy(struct fuse_session *se)
|
||||||
|
{
|
||||||
|
if (se->got_init && !se->got_destroy) {
|
||||||
|
if (se->op.destroy) {
|
||||||
|
- se->op.destroy(se->userdata);
|
||||||
|
+ se->op.destroy(se->userdata, se);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_rwlock_destroy(&se->init_rwlock);
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index 2851840cc2..2259623776 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -208,7 +208,7 @@ struct fuse_lowlevel_ops {
|
||||||
|
*
|
||||||
|
* @param userdata the user data passed to fuse_session_new()
|
||||||
|
*/
|
||||||
|
- void (*destroy)(void *userdata);
|
||||||
|
+ void (*destroy)(void *userdata, struct fuse_session *se);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up a directory entry by name and get its attributes.
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index ebd5a9b215..0d3cda8d2f 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2871,7 +2871,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void lo_destroy(void *userdata)
|
||||||
|
+static void lo_destroy(void *userdata, struct fuse_session *se)
|
||||||
|
{
|
||||||
|
struct lo_data *lo = (struct lo_data *)userdata;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,37 @@
|
|||||||
|
From 72bccc497aeb9057e36477c327e0ac58bc154e6f Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 30 Nov 2018 11:50:25 +0000
|
||||||
|
Subject: [PATCH 15/29] DAX: virtiofsd: Perform an unmap on destroy
|
||||||
|
|
||||||
|
Force unmap all remaining dax cache entries on a destroy.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 11 +++++++++++
|
||||||
|
1 file changed, 11 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 0d3cda8d2f..56a4b9404a 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2875,6 +2875,17 @@ static void lo_destroy(void *userdata, struct fuse_session *se)
|
||||||
|
{
|
||||||
|
struct lo_data *lo = (struct lo_data *)userdata;
|
||||||
|
|
||||||
|
+ if (fuse_lowlevel_is_virtio(se)) {
|
||||||
|
+ VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
+
|
||||||
|
+ msg.len[0] = ~(uint64_t)0; /* Special: means 'all' */
|
||||||
|
+ msg.c_offset[0] = 0;
|
||||||
|
+ if (fuse_virtio_unmap(se, &msg)) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "%s: unmap during destroy failed\n",
|
||||||
|
+ __func__);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
pthread_mutex_lock(&lo->mutex);
|
||||||
|
while (true) {
|
||||||
|
GHashTableIter iter;
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,34 @@
|
|||||||
|
From c05795e129152533d66f131dd019ae903d1eb39a Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Thu, 2 May 2019 18:04:04 +0100
|
||||||
|
Subject: [PATCH 16/29] DAX: libvhost-user: Allow popping a queue element with
|
||||||
|
bad pointers
|
||||||
|
|
||||||
|
Allow a daemon implemented with libvhost-user to accept an
|
||||||
|
element with pointers to memory that aren't in the mapping table.
|
||||||
|
The daemon might have some special way to deal with some special
|
||||||
|
cases of this.
|
||||||
|
|
||||||
|
The default behaviour doesn't change.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
block/export/vhost-user-blk-server.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
|
||||||
|
index ab2c4d44c4..ea2d302e33 100644
|
||||||
|
--- a/block/export/vhost-user-blk-server.c
|
||||||
|
+++ b/block/export/vhost-user-blk-server.c
|
||||||
|
@@ -205,7 +205,7 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
|
||||||
|
while (1) {
|
||||||
|
VuBlkReq *req;
|
||||||
|
|
||||||
|
- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
|
||||||
|
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq), NULL, NULL);
|
||||||
|
if (!req) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,211 @@
|
|||||||
|
From a238faf5a53668aac037f7ce026d1bf785ee4186 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 11:54:02 +0100
|
||||||
|
Subject: [PATCH 17/29] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO
|
||||||
|
|
||||||
|
Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
|
||||||
|
client to ask qemu to perform a read/write from an fd directly
|
||||||
|
to GPA.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
docs/interop/vhost-user.rst | 11 +++
|
||||||
|
hw/virtio/trace-events | 6 ++
|
||||||
|
hw/virtio/vhost-user-fs.c | 87 +++++++++++++++++++++++
|
||||||
|
hw/virtio/vhost-user.c | 4 ++
|
||||||
|
include/hw/virtio/vhost-user-fs.h | 1 +
|
||||||
|
subprojects/libvhost-user/libvhost-user.h | 1 +
|
||||||
|
6 files changed, 110 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
|
||||||
|
index 056f94c6fb..8d6ec92881 100644
|
||||||
|
--- a/docs/interop/vhost-user.rst
|
||||||
|
+++ b/docs/interop/vhost-user.rst
|
||||||
|
@@ -1455,6 +1455,17 @@ Slave message types
|
||||||
|
``VHOST_USER_SLAVE_FS_SYNC``
|
||||||
|
[Semantic details TBD]
|
||||||
|
|
||||||
|
+``VHOST_USER_SLAVE_FS_IO``
|
||||||
|
+ :id: 9
|
||||||
|
+ :equivalent ioctl: N/A
|
||||||
|
+ :slave payload: fd + n * (offset + address + len)
|
||||||
|
+ :master payload: N/A
|
||||||
|
+
|
||||||
|
+ Requests that the QEMU performs IO directly from an fd to guest memory
|
||||||
|
+ on behalf of the daemon; this is normally for a case where a memory region
|
||||||
|
+ isn't visible to the daemon.
|
||||||
|
+ [Semantic details TBD]
|
||||||
|
+
|
||||||
|
.. _reply_ack:
|
||||||
|
|
||||||
|
VHOST_USER_PROTOCOL_F_REPLY_ACK
|
||||||
|
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
|
||||||
|
index 2060a144a2..a35adf5caf 100644
|
||||||
|
--- a/hw/virtio/trace-events
|
||||||
|
+++ b/hw/virtio/trace-events
|
||||||
|
@@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
|
||||||
|
vhost_vdpa_set_owner(void *dev) "dev: %p"
|
||||||
|
vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
|
||||||
|
|
||||||
|
+# vhost-user-fs.c
|
||||||
|
+
|
||||||
|
+vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
|
||||||
|
+vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
|
||||||
|
+vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
|
||||||
|
+
|
||||||
|
# virtio.c
|
||||||
|
virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
|
||||||
|
virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index 98cec993f7..82a32492a7 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -22,6 +22,8 @@
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "hw/virtio/vhost-user-fs.h"
|
||||||
|
#include "monitor/monitor.h"
|
||||||
|
+#include "exec/address-spaces.h"
|
||||||
|
+#include "trace.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The powerpc kernel code expects the memory to be accessible during
|
||||||
|
@@ -201,6 +203,91 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
+int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd)
|
||||||
|
+{
|
||||||
|
+ VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
+ if (!fs) {
|
||||||
|
+ /* Shouldn't happen - but seen it in error paths */
|
||||||
|
+ fprintf(stderr, "%s: Bad fs ptr\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ unsigned int i;
|
||||||
|
+ int res = 0;
|
||||||
|
+ size_t done = 0;
|
||||||
|
+
|
||||||
|
+ if (fd < 0) {
|
||||||
|
+ fprintf(stderr, "%s: Bad fd for map\n", __func__);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) {
|
||||||
|
+ if (sm->len[i] == 0) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ size_t len = sm->len[i];
|
||||||
|
+ hwaddr gpa = sm->c_offset[i];
|
||||||
|
+
|
||||||
|
+ while (len && !res) {
|
||||||
|
+ MemoryRegionSection mrs = memory_region_find(get_system_memory(),
|
||||||
|
+ gpa, len);
|
||||||
|
+ size_t mrs_size = (size_t)int128_get64(mrs.size);
|
||||||
|
+
|
||||||
|
+ if (!mrs_size) {
|
||||||
|
+ fprintf(stderr,
|
||||||
|
+ "%s: No guest region found for 0x%" HWADDR_PRIx "\n",
|
||||||
|
+ __func__, gpa);
|
||||||
|
+ res = -EFAULT;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_vhost_user_fs_slave_io_loop(mrs.mr->name,
|
||||||
|
+ (uint64_t)mrs.offset_within_region,
|
||||||
|
+ memory_region_is_ram(mrs.mr),
|
||||||
|
+ memory_region_is_romd(mrs.mr),
|
||||||
|
+ (size_t)mrs_size);
|
||||||
|
+
|
||||||
|
+ void *hostptr = qemu_map_ram_ptr(mrs.mr->ram_block,
|
||||||
|
+ mrs.offset_within_region);
|
||||||
|
+ ssize_t transferred;
|
||||||
|
+ if (sm->flags[i] & VHOST_USER_FS_FLAG_MAP_R) {
|
||||||
|
+ /* Read from file into RAM */
|
||||||
|
+ if (mrs.mr->readonly) {
|
||||||
|
+ res = -EFAULT;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ transferred = pread(fd, hostptr, mrs_size, sm->fd_offset[i]);
|
||||||
|
+ } else {
|
||||||
|
+ /* Write into file from RAM */
|
||||||
|
+ assert((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W));
|
||||||
|
+ transferred = pwrite(fd, hostptr, mrs_size, sm->fd_offset[i]);
|
||||||
|
+ }
|
||||||
|
+ trace_vhost_user_fs_slave_io_loop_res(transferred);
|
||||||
|
+ if (transferred < 0) {
|
||||||
|
+ res = -errno;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ if (!transferred) {
|
||||||
|
+ /* EOF */
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ done += transferred;
|
||||||
|
+ len -= transferred;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ close(fd);
|
||||||
|
+
|
||||||
|
+ trace_vhost_user_fs_slave_io_exit(res, done);
|
||||||
|
+ /*
|
||||||
|
+ * TODO! We should be returning 'done' if possible but our error handling
|
||||||
|
+ * doesn't know about that yet.
|
||||||
|
+ */
|
||||||
|
+ return res;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(vdev);
|
||||||
|
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||||
|
index 757dee0d1e..b4ef0102ad 100644
|
||||||
|
--- a/hw/virtio/vhost-user.c
|
||||||
|
+++ b/hw/virtio/vhost-user.c
|
||||||
|
@@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest {
|
||||||
|
VHOST_USER_SLAVE_FS_MAP = 6,
|
||||||
|
VHOST_USER_SLAVE_FS_UNMAP = 7,
|
||||||
|
VHOST_USER_SLAVE_FS_SYNC = 8,
|
||||||
|
+ VHOST_USER_SLAVE_FS_IO = 9,
|
||||||
|
VHOST_USER_SLAVE_MAX
|
||||||
|
} VhostUserSlaveRequest;
|
||||||
|
|
||||||
|
@@ -1487,6 +1488,9 @@ static void slave_read(void *opaque)
|
||||||
|
case VHOST_USER_SLAVE_FS_SYNC:
|
||||||
|
ret = vhost_user_fs_slave_sync(dev, &payload.fs);
|
||||||
|
break;
|
||||||
|
+ case VHOST_USER_SLAVE_FS_IO:
|
||||||
|
+ ret = vhost_user_fs_slave_io(dev, &payload.fs, fd[0]);
|
||||||
|
+ break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
error_report("Received unexpected msg type: %d.", hdr.request);
|
||||||
|
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
index 69cc6340ed..0750687463 100644
|
||||||
|
--- a/include/hw/virtio/vhost-user-fs.h
|
||||||
|
+++ b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
@@ -68,5 +68,6 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
int fd);
|
||||||
|
int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
+int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd);
|
||||||
|
|
||||||
|
#endif /* _QEMU_VHOST_USER_FS_H */
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
index c63a590069..4b6e681a3e 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
@@ -122,6 +122,7 @@ typedef enum VhostUserSlaveRequest {
|
||||||
|
VHOST_USER_SLAVE_FS_MAP = 6,
|
||||||
|
VHOST_USER_SLAVE_FS_UNMAP = 7,
|
||||||
|
VHOST_USER_SLAVE_FS_SYNC = 8,
|
||||||
|
+ VHOST_USER_SLAVE_FS_IO = 9,
|
||||||
|
VHOST_USER_SLAVE_MAX
|
||||||
|
} VhostUserSlaveRequest;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,98 @@
|
|||||||
|
From 5e3aff71f01f41254cdc7ecefc98a31be002dda0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 12:17:36 +0100
|
||||||
|
Subject: [PATCH 18/29] DAX/unmap virtiofsd: Add wrappers for
|
||||||
|
VHOST_USER_SLAVE_FS_IO
|
||||||
|
|
||||||
|
Add a wrapper to send VHOST_USER_SLAVE_FS_IO commands and a
|
||||||
|
further wrapper for sending a fuse_buf write using the FS_IO
|
||||||
|
slave command.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 24 +++++++++++++++++++++
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 38 +++++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 62 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index 2259623776..866d122352 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -2022,4 +2022,28 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg);
|
||||||
|
*/
|
||||||
|
int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg);
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * For use with virtio-fs; request IO directly to memory
|
||||||
|
+ *
|
||||||
|
+ * @param se The current session
|
||||||
|
+ * @param msg A set of IO requests
|
||||||
|
+ * @param fd The fd to map
|
||||||
|
+ * @return Zero on success
|
||||||
|
+ */
|
||||||
|
+int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * For use with virtio-fs; wrapper for fuse_virtio_io for writes
|
||||||
|
+ * from memory to an fd
|
||||||
|
+ * @param req The request that triggered this action
|
||||||
|
+ * @param dst The destination (file) memory buffer
|
||||||
|
+ * @param dst_off Byte offset in the file
|
||||||
|
+ * @param src The source (memory) buffer
|
||||||
|
+ * @param src_off The GPA
|
||||||
|
+ * @param len Length in bytes
|
||||||
|
+ */
|
||||||
|
+ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst,
|
||||||
|
+ size_t dst_off, const struct fuse_buf *src,
|
||||||
|
+ size_t src_off, size_t len);
|
||||||
|
+
|
||||||
|
#endif /* FUSE_LOWLEVEL_H_ */
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index 24d9323665..abac0d0d2e 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -1071,3 +1071,41 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg)
|
||||||
|
return !vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
VHOST_USER_SLAVE_FS_SYNC, -1, msg);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd)
|
||||||
|
+{
|
||||||
|
+ if (!se->virtio_dev) {
|
||||||
|
+ return -ENODEV;
|
||||||
|
+ }
|
||||||
|
+ return !vu_fs_cache_request(&se->virtio_dev->dev,
|
||||||
|
+ VHOST_USER_SLAVE_FS_IO, fd, msg);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Write to a file (dst) from an area of guest GPA (src) that probably
|
||||||
|
+ * isn't visible to the daemon.
|
||||||
|
+ */
|
||||||
|
+ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst,
|
||||||
|
+ size_t dst_off, const struct fuse_buf *src,
|
||||||
|
+ size_t src_off, size_t len)
|
||||||
|
+{
|
||||||
|
+ VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
+
|
||||||
|
+ if (dst->flags & FUSE_BUF_FD_SEEK) {
|
||||||
|
+ msg.fd_offset[0] = dst->pos + dst_off;
|
||||||
|
+ } else {
|
||||||
|
+ off_t cur = lseek(dst->fd, 0, SEEK_CUR);
|
||||||
|
+ if (cur == (off_t)-1) {
|
||||||
|
+ return -errno;
|
||||||
|
+ }
|
||||||
|
+ msg.fd_offset[0] = cur;
|
||||||
|
+ }
|
||||||
|
+ msg.c_offset[0] = (uintptr_t)src->mem + src_off;
|
||||||
|
+ msg.len[0] = len;
|
||||||
|
+ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W;
|
||||||
|
+
|
||||||
|
+ bool result = !fuse_virtio_io(req->se, &msg, dst->fd);
|
||||||
|
+ /* TODO: Rework the result path to actually get length/error */
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result);
|
||||||
|
+ return result ? len : -EIO;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,335 @@
|
|||||||
|
From 1586d4a5525f44c51cbcbd5004b9a79bfc8c495c Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 13:26:09 +0100
|
||||||
|
Subject: [PATCH 19/29] DAX/unmap virtiofsd: Parse unmappable elements
|
||||||
|
|
||||||
|
For some read/writes the virtio queue elements are unmappable by
|
||||||
|
the daemon; these are cases where the data is to be read/written
|
||||||
|
from non-RAM. In viritofs's case this is typically a direct read/write
|
||||||
|
into an mmap'd DAX file also on virtiofs (possibly on another instance).
|
||||||
|
|
||||||
|
When we receive a virtio queue element, check that we have enough
|
||||||
|
mappable data to handle the headers. Make a note of the number of
|
||||||
|
unmappable 'in' entries (ie. for read data back to the VMM),
|
||||||
|
and flag the fuse_bufvec for 'out' entries with a new flag
|
||||||
|
FUSE_BUF_PHYS_ADDR.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
with fix by:
|
||||||
|
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/buffer.c | 4 +-
|
||||||
|
tools/virtiofsd/fuse_common.h | 7 ++
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 191 ++++++++++++++++++++++++----------
|
||||||
|
3 files changed, 145 insertions(+), 57 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
|
||||||
|
index 874f01c488..1a050aa441 100644
|
||||||
|
--- a/tools/virtiofsd/buffer.c
|
||||||
|
+++ b/tools/virtiofsd/buffer.c
|
||||||
|
@@ -77,6 +77,7 @@ static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
|
||||||
|
ssize_t res = 0;
|
||||||
|
size_t copied = 0;
|
||||||
|
|
||||||
|
+ assert(!(src->flags & FUSE_BUF_PHYS_ADDR));
|
||||||
|
while (len) {
|
||||||
|
if (dst->flags & FUSE_BUF_FD_SEEK) {
|
||||||
|
res = pwrite(dst->fd, (char *)src->mem + src_off, len,
|
||||||
|
@@ -272,7 +273,8 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
|
||||||
|
* process
|
||||||
|
*/
|
||||||
|
for (i = 0; i < srcv->count; i++) {
|
||||||
|
- if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
|
||||||
|
+ if ((srcv->buf[i].flags & FUSE_BUF_PHYS_ADDR) ||
|
||||||
|
+ (srcv->buf[i].flags & FUSE_BUF_IS_FD)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
|
||||||
|
index a090040bb2..ed9280de91 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_common.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_common.h
|
||||||
|
@@ -611,6 +611,13 @@ enum fuse_buf_flags {
|
||||||
|
* detected.
|
||||||
|
*/
|
||||||
|
FUSE_BUF_FD_RETRY = (1 << 3),
|
||||||
|
+
|
||||||
|
+ /**
|
||||||
|
+ * The addresses in the iovec represent guest physical addresses
|
||||||
|
+ * that can't be mapped by the daemon process.
|
||||||
|
+ * IO must be bounced back to the VMM to do it.
|
||||||
|
+ */
|
||||||
|
+ FUSE_BUF_PHYS_ADDR = (1 << 4),
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index abac0d0d2e..31f17ab043 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -49,6 +49,10 @@ typedef struct {
|
||||||
|
VuVirtqElement elem;
|
||||||
|
struct fuse_chan ch;
|
||||||
|
|
||||||
|
+ /* Number of unmappable iovecs */
|
||||||
|
+ unsigned bad_in_num;
|
||||||
|
+ unsigned bad_out_num;
|
||||||
|
+
|
||||||
|
/* Used to complete requests that involve no reply */
|
||||||
|
bool reply_sent;
|
||||||
|
} FVRequest;
|
||||||
|
@@ -291,8 +295,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
|
||||||
|
/* The 'in' part of the elem is to qemu */
|
||||||
|
unsigned int in_num = elem->in_num;
|
||||||
|
+ unsigned int bad_in_num = req->bad_in_num;
|
||||||
|
struct iovec *in_sg = elem->in_sg;
|
||||||
|
size_t in_len = iov_size(in_sg, in_num);
|
||||||
|
+ size_t in_len_writeable = iov_size(in_sg, in_num - bad_in_num);
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
|
||||||
|
__func__, elem->index, in_num, in_len);
|
||||||
|
|
||||||
|
@@ -300,7 +306,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
* The elem should have room for a 'fuse_out_header' (out from fuse)
|
||||||
|
* plus the data based on the len in the header.
|
||||||
|
*/
|
||||||
|
- if (in_len < sizeof(struct fuse_out_header)) {
|
||||||
|
+ if (in_len_writeable < sizeof(struct fuse_out_header)) {
|
||||||
|
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
|
||||||
|
__func__, elem->index);
|
||||||
|
ret = E2BIG;
|
||||||
|
@@ -327,7 +333,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
|
||||||
|
/* These get updated as we skip */
|
||||||
|
struct iovec *in_sg_ptr = in_sg_cpy;
|
||||||
|
- int in_sg_cpy_count = in_num;
|
||||||
|
+ int in_sg_cpy_count = in_num - bad_in_num;
|
||||||
|
|
||||||
|
/* skip over parts of in_sg that contained the header iov */
|
||||||
|
size_t skip_size = iov_len;
|
||||||
|
@@ -460,17 +466,21 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
|
||||||
|
|
||||||
|
/* The 'out' part of the elem is from qemu */
|
||||||
|
unsigned int out_num = elem->out_num;
|
||||||
|
+ unsigned int out_num_readable = out_num - req->bad_out_num;
|
||||||
|
struct iovec *out_sg = elem->out_sg;
|
||||||
|
size_t out_len = iov_size(out_sg, out_num);
|
||||||
|
+ size_t out_len_readable = iov_size(out_sg, out_num_readable);
|
||||||
|
fuse_log(FUSE_LOG_DEBUG,
|
||||||
|
- "%s: elem %d: with %d out desc of length %zd\n",
|
||||||
|
- __func__, elem->index, out_num, out_len);
|
||||||
|
+ "%s: elem %d: with %d out desc of length %zd"
|
||||||
|
+ " bad_in_num=%u bad_out_num=%u\n",
|
||||||
|
+ __func__, elem->index, out_num, out_len, req->bad_in_num,
|
||||||
|
+ req->bad_out_num);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The elem should contain a 'fuse_in_header' (in to fuse)
|
||||||
|
* plus the data based on the len in the header.
|
||||||
|
*/
|
||||||
|
- if (out_len < sizeof(struct fuse_in_header)) {
|
||||||
|
+ if (out_len_readable < sizeof(struct fuse_in_header)) {
|
||||||
|
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
|
||||||
|
__func__, elem->index);
|
||||||
|
assert(0); /* TODO */
|
||||||
|
@@ -484,63 +494,129 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
|
||||||
|
copy_from_iov(&fbuf, 1, out_sg);
|
||||||
|
|
||||||
|
pbufv = NULL; /* Compiler thinks an unitialised path */
|
||||||
|
- if (out_num > 2 &&
|
||||||
|
- out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
|
||||||
|
- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
|
||||||
|
- out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
|
||||||
|
- /*
|
||||||
|
- * For a write we don't actually need to copy the
|
||||||
|
- * data, we can just do it straight out of guest memory
|
||||||
|
- * but we must still copy the headers in case the guest
|
||||||
|
- * was nasty and changed them while we were using them.
|
||||||
|
- */
|
||||||
|
- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
|
||||||
|
-
|
||||||
|
- /* copy the fuse_write_in header afte rthe fuse_in_header */
|
||||||
|
- fbuf.mem += out_sg->iov_len;
|
||||||
|
- copy_from_iov(&fbuf, 1, out_sg + 1);
|
||||||
|
- fbuf.mem -= out_sg->iov_len;
|
||||||
|
- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
|
||||||
|
-
|
||||||
|
- /* Allocate the bufv, with space for the rest of the iov */
|
||||||
|
- pbufv = malloc(sizeof(struct fuse_bufvec) +
|
||||||
|
- sizeof(struct fuse_buf) * (out_num - 2));
|
||||||
|
- if (!pbufv) {
|
||||||
|
- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
|
||||||
|
- __func__);
|
||||||
|
- goto out;
|
||||||
|
- }
|
||||||
|
+ if (req->bad_in_num || req->bad_out_num) {
|
||||||
|
+ bool handled_unmappable = false;
|
||||||
|
+
|
||||||
|
+ if (out_num > 2 && out_num_readable >= 2 && !req->bad_in_num &&
|
||||||
|
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
|
||||||
|
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
|
||||||
|
+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
|
||||||
|
+ handled_unmappable = true;
|
||||||
|
+
|
||||||
|
+ /* copy the fuse_write_in header after fuse_in_header */
|
||||||
|
+ fbuf.mem += out_sg->iov_len;
|
||||||
|
+ copy_from_iov(&fbuf, 1, out_sg + 1);
|
||||||
|
+ fbuf.mem -= out_sg->iov_len;
|
||||||
|
+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
|
||||||
|
+
|
||||||
|
+ /* Allocate the bufv, with space for the rest of the iov */
|
||||||
|
+ pbufv = malloc(sizeof(struct fuse_bufvec) +
|
||||||
|
+ sizeof(struct fuse_buf) * (out_num - 2));
|
||||||
|
+ if (!pbufv) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
|
||||||
|
+ __func__);
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- allocated_bufv = true;
|
||||||
|
- pbufv->count = 1;
|
||||||
|
- pbufv->buf[0] = fbuf;
|
||||||
|
+ allocated_bufv = true;
|
||||||
|
+ pbufv->count = 1;
|
||||||
|
+ pbufv->buf[0] = fbuf;
|
||||||
|
+
|
||||||
|
+ size_t iovindex, pbufvindex;
|
||||||
|
+ iovindex = 2; /* 2 headers, separate iovs */
|
||||||
|
+ pbufvindex = 1; /* 2 headers, 1 fusebuf */
|
||||||
|
+
|
||||||
|
+ for (; iovindex < out_num; iovindex++, pbufvindex++) {
|
||||||
|
+ pbufv->count++;
|
||||||
|
+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
|
||||||
|
+ pbufv->buf[pbufvindex].flags =
|
||||||
|
+ (iovindex < out_num_readable) ? 0 :
|
||||||
|
+ FUSE_BUF_PHYS_ADDR;
|
||||||
|
+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
|
||||||
|
+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- size_t iovindex, pbufvindex;
|
||||||
|
- iovindex = 2; /* 2 headers, separate iovs */
|
||||||
|
- pbufvindex = 1; /* 2 headers, 1 fusebuf */
|
||||||
|
+ if (out_num == 2 && out_num_readable == 2 && req->bad_in_num &&
|
||||||
|
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
|
||||||
|
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_READ &&
|
||||||
|
+ out_sg[1].iov_len == sizeof(struct fuse_read_in)) {
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG,
|
||||||
|
+ "Unmappable read case "
|
||||||
|
+ "in_num=%d bad_in_num=%d\n",
|
||||||
|
+ elem->in_num, req->bad_in_num);
|
||||||
|
+ handled_unmappable = true;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- for (; iovindex < out_num; iovindex++, pbufvindex++) {
|
||||||
|
- pbufv->count++;
|
||||||
|
- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
|
||||||
|
- pbufv->buf[pbufvindex].flags = 0;
|
||||||
|
- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
|
||||||
|
- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
|
||||||
|
+ if (!handled_unmappable) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR,
|
||||||
|
+ "Unhandled unmappable element: out: %d(b:%d) in: "
|
||||||
|
+ "%d(b:%d)",
|
||||||
|
+ out_num, req->bad_out_num, elem->in_num, req->bad_in_num);
|
||||||
|
+ fv_panic(dev, "Unhandled unmappable element");
|
||||||
|
}
|
||||||
|
- } else {
|
||||||
|
- /* Normal (non fast write) path */
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!req->bad_out_num) {
|
||||||
|
+ if (out_num > 2 &&
|
||||||
|
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
|
||||||
|
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
|
||||||
|
+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
|
||||||
|
+ /*
|
||||||
|
+ * For a write we don't actually need to copy the
|
||||||
|
+ * data, we can just do it straight out of guest memory
|
||||||
|
+ * but we must still copy the headers in case the guest
|
||||||
|
+ * was nasty and changed them while we were using them.
|
||||||
|
+ */
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n",
|
||||||
|
+ __func__);
|
||||||
|
+
|
||||||
|
+ /* copy the fuse_write_in header after fuse_in_header */
|
||||||
|
+ fbuf.mem += out_sg->iov_len;
|
||||||
|
+ copy_from_iov(&fbuf, 1, out_sg + 1);
|
||||||
|
+ fbuf.mem -= out_sg->iov_len;
|
||||||
|
+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
|
||||||
|
+
|
||||||
|
+ /* Allocate the bufv, with space for the rest of the iov */
|
||||||
|
+ pbufv = malloc(sizeof(struct fuse_bufvec) +
|
||||||
|
+ sizeof(struct fuse_buf) * (out_num - 2));
|
||||||
|
+ if (!pbufv) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
|
||||||
|
+ __func__);
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- /* Copy the rest of the buffer */
|
||||||
|
- fbuf.mem += out_sg->iov_len;
|
||||||
|
- copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
|
||||||
|
- fbuf.mem -= out_sg->iov_len;
|
||||||
|
- fbuf.size = out_len;
|
||||||
|
+ allocated_bufv = true;
|
||||||
|
+ pbufv->count = 1;
|
||||||
|
+ pbufv->buf[0] = fbuf;
|
||||||
|
|
||||||
|
- /* TODO! Endianness of header */
|
||||||
|
+ size_t iovindex, pbufvindex;
|
||||||
|
+ iovindex = 2; /* 2 headers, separate iovs */
|
||||||
|
+ pbufvindex = 1; /* 2 headers, 1 fusebuf */
|
||||||
|
|
||||||
|
- /* TODO: Add checks for fuse_session_exited */
|
||||||
|
- bufv.buf[0] = fbuf;
|
||||||
|
- bufv.count = 1;
|
||||||
|
- pbufv = &bufv;
|
||||||
|
+ for (; iovindex < out_num; iovindex++, pbufvindex++) {
|
||||||
|
+ pbufv->count++;
|
||||||
|
+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
|
||||||
|
+ pbufv->buf[pbufvindex].flags = 0;
|
||||||
|
+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
|
||||||
|
+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
|
||||||
|
+ }
|
||||||
|
+ } else {
|
||||||
|
+ /* Normal (non fast write) path */
|
||||||
|
+
|
||||||
|
+ /* Copy the rest of the buffer */
|
||||||
|
+ fbuf.mem += out_sg->iov_len;
|
||||||
|
+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
|
||||||
|
+ fbuf.mem -= out_sg->iov_len;
|
||||||
|
+ fbuf.size = out_len;
|
||||||
|
+
|
||||||
|
+ /* TODO! Endianness of header */
|
||||||
|
+
|
||||||
|
+ /* TODO: Add checks for fuse_session_exited */
|
||||||
|
+ bufv.buf[0] = fbuf;
|
||||||
|
+ bufv.count = 1;
|
||||||
|
+ pbufv = &bufv;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
pbufv->idx = 0;
|
||||||
|
pbufv->off = 0;
|
||||||
|
@@ -657,13 +733,16 @@ static void *fv_queue_thread(void *opaque)
|
||||||
|
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
+ unsigned int bad_in_num = 0, bad_out_num = 0;
|
||||||
|
FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest),
|
||||||
|
- NULL, NULL);
|
||||||
|
+ &bad_in_num, &bad_out_num);
|
||||||
|
if (!req) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
req->reply_sent = false;
|
||||||
|
+ req->bad_in_num = bad_in_num;
|
||||||
|
+ req->bad_out_num = bad_out_num;
|
||||||
|
|
||||||
|
if (!se->thread_pool_size) {
|
||||||
|
req_list = g_list_prepend(req_list, req);
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,56 @@
|
|||||||
|
From 1f6a9f8567bdf2be00d217abac33a71248541a4a Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 13:26:51 +0100
|
||||||
|
Subject: [PATCH 20/29] DAX/unmap virtiofsd: Route unmappable reads
|
||||||
|
|
||||||
|
When a read with unmappable buffers is found, map it to a slave
|
||||||
|
read command.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 29 +++++++++++++++++++++++++++++
|
||||||
|
1 file changed, 29 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index 31f17ab043..1f4c7fff35 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -397,6 +397,35 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
in_sg_left -= ret;
|
||||||
|
len -= ret;
|
||||||
|
} while (in_sg_left);
|
||||||
|
+
|
||||||
|
+ if (bad_in_num) {
|
||||||
|
+ while (len && bad_in_num) {
|
||||||
|
+ VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
+ msg.flags[0] = VHOST_USER_FS_FLAG_MAP_R;
|
||||||
|
+ msg.fd_offset[0] = buf->buf[0].pos;
|
||||||
|
+ msg.c_offset[0] = (uint64_t)(uintptr_t)in_sg_ptr[0].iov_base;
|
||||||
|
+ msg.len[0] = in_sg_ptr[0].iov_len;
|
||||||
|
+ if (len < msg.len[0]) {
|
||||||
|
+ msg.len[0] = len;
|
||||||
|
+ }
|
||||||
|
+ bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd);
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG,
|
||||||
|
+ "%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd "
|
||||||
|
+ "c_offset=%p req_res=%d\n",
|
||||||
|
+ __func__, len, bad_in_num, buf->buf[0].pos,
|
||||||
|
+ in_sg_ptr[0].iov_base, req_res);
|
||||||
|
+ if (req_res) {
|
||||||
|
+ len -= msg.len[0];
|
||||||
|
+ buf->buf[0].pos += msg.len[0];
|
||||||
|
+ in_sg_ptr++;
|
||||||
|
+ bad_in_num--;
|
||||||
|
+ } else {
|
||||||
|
+ ret = EIO;
|
||||||
|
+ free(in_sg_cpy);
|
||||||
|
+ goto err;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
free(in_sg_cpy);
|
||||||
|
|
||||||
|
/* Need to fix out->len on EOF */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,121 @@
|
|||||||
|
From e291b7766f49b06933afed374b6476416d951517 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 13:18:42 +0100
|
||||||
|
Subject: [PATCH 21/29] DAX/unmap virtiofsd: route unmappable write to slave
|
||||||
|
command
|
||||||
|
|
||||||
|
When a fuse_buf_copy is performed on an element with FUSE_BUF_PHYS_ADDR
|
||||||
|
route it to a fuse_virtio_write request that does a slave command to
|
||||||
|
perform the write.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/buffer.c | 14 +++++++++++---
|
||||||
|
tools/virtiofsd/fuse_common.h | 6 +++++-
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 3 ---
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 2 +-
|
||||||
|
4 files changed, 17 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
|
||||||
|
index 1a050aa441..8135d52d2a 100644
|
||||||
|
--- a/tools/virtiofsd/buffer.c
|
||||||
|
+++ b/tools/virtiofsd/buffer.c
|
||||||
|
@@ -200,13 +200,20 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
|
||||||
|
return copied;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
|
||||||
|
+static ssize_t fuse_buf_copy_one(fuse_req_t req,
|
||||||
|
+ const struct fuse_buf *dst, size_t dst_off,
|
||||||
|
const struct fuse_buf *src, size_t src_off,
|
||||||
|
size_t len)
|
||||||
|
{
|
||||||
|
int src_is_fd = src->flags & FUSE_BUF_IS_FD;
|
||||||
|
int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
|
||||||
|
+ int src_is_phys = src->flags & FUSE_BUF_PHYS_ADDR;
|
||||||
|
+ int dst_is_phys = src->flags & FUSE_BUF_PHYS_ADDR;
|
||||||
|
|
||||||
|
+ if (src_is_phys && !src_is_fd && dst_is_fd) {
|
||||||
|
+ return fuse_virtio_write(req, dst, dst_off, src, src_off, len);
|
||||||
|
+ }
|
||||||
|
+ assert(!src_is_phys && !dst_is_phys);
|
||||||
|
if (!src_is_fd && !dst_is_fd) {
|
||||||
|
char *dstmem = (char *)dst->mem + dst_off;
|
||||||
|
char *srcmem = (char *)src->mem + src_off;
|
||||||
|
@@ -259,7 +266,8 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
|
||||||
|
+ssize_t fuse_buf_copy(fuse_req_t req, struct fuse_bufvec *dstv,
|
||||||
|
+ struct fuse_bufvec *srcv)
|
||||||
|
{
|
||||||
|
size_t copied = 0, i;
|
||||||
|
|
||||||
|
@@ -301,7 +309,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
|
||||||
|
dst_len = dst->size - dstv->off;
|
||||||
|
len = min_size(src_len, dst_len);
|
||||||
|
|
||||||
|
- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
|
||||||
|
+ res = fuse_buf_copy_one(req, dst, dstv->off, src, srcv->off, len);
|
||||||
|
if (res < 0) {
|
||||||
|
if (!copied) {
|
||||||
|
return res;
|
||||||
|
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
|
||||||
|
index ed9280de91..05d56883dd 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_common.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_common.h
|
||||||
|
@@ -495,6 +495,8 @@ struct fuse_conn_info {
|
||||||
|
struct fuse_session;
|
||||||
|
struct fuse_pollhandle;
|
||||||
|
struct fuse_conn_info_opts;
|
||||||
|
+struct fuse_req;
|
||||||
|
+typedef struct fuse_req *fuse_req_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function parses several command-line options that can be used
|
||||||
|
@@ -713,11 +715,13 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv);
|
||||||
|
/**
|
||||||
|
* Copy data from one buffer vector to another
|
||||||
|
*
|
||||||
|
+ * @param req The request this copy is part of
|
||||||
|
* @param dst destination buffer vector
|
||||||
|
* @param src source buffer vector
|
||||||
|
* @return actual number of bytes copied or -errno on error
|
||||||
|
*/
|
||||||
|
-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
|
||||||
|
+ssize_t fuse_buf_copy(fuse_req_t req,
|
||||||
|
+ struct fuse_bufvec *dst, struct fuse_bufvec *src);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Memory buffer iterator
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index 866d122352..e543f64177 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -42,9 +42,6 @@
|
||||||
|
/** Inode number type */
|
||||||
|
typedef uint64_t fuse_ino_t;
|
||||||
|
|
||||||
|
-/** Request pointer type */
|
||||||
|
-typedef struct fuse_req *fuse_req_t;
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* Session
|
||||||
|
*
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 56a4b9404a..ab33fabcda 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2063,7 +2063,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- res = fuse_buf_copy(&out_buf, in_buf);
|
||||||
|
+ res = fuse_buf_copy(req, &out_buf, in_buf);
|
||||||
|
if (res < 0) {
|
||||||
|
fuse_reply_err(req, -res);
|
||||||
|
} else {
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,350 @@
|
|||||||
|
From 2a64df420827ff0b127a30f2ac877a7b1ded925b Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 18:08:41 +0100
|
||||||
|
Subject: [PATCH 22/29] DAX: vhost-user: Rework slave return values
|
||||||
|
|
||||||
|
All the current slave handlers on the qemu side generate an 'int'
|
||||||
|
return value that's squashed down to a bool (!!ret) and stuffed into
|
||||||
|
a uint64_t (field of a union) to be returned.
|
||||||
|
|
||||||
|
Move the uint64_t type back up through the individual handlers so
|
||||||
|
that we can mkae one actually return a full uint64_t.
|
||||||
|
|
||||||
|
Note that the definition in the interop spec says most of these
|
||||||
|
cases are defined as returning 0 on success and non-0 for failure,
|
||||||
|
so it's OK to change from a bool to another non-0.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
hw/virtio/vhost-backend.c | 4 +--
|
||||||
|
hw/virtio/vhost-user-fs.c | 42 ++++++++++++++++---------------
|
||||||
|
hw/virtio/vhost-user.c | 32 ++++++++++++-----------
|
||||||
|
include/hw/virtio/vhost-backend.h | 2 +-
|
||||||
|
include/hw/virtio/vhost-user-fs.h | 13 ++++++----
|
||||||
|
5 files changed, 50 insertions(+), 43 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
|
||||||
|
index 222bbcc62d..e81083ddda 100644
|
||||||
|
--- a/hw/virtio/vhost-backend.c
|
||||||
|
+++ b/hw/virtio/vhost-backend.c
|
||||||
|
@@ -401,7 +401,7 @@ int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
|
||||||
|
+uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
|
||||||
|
struct vhost_iotlb_msg *imsg)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
@@ -424,5 +424,5 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- return ret;
|
||||||
|
+ return !!ret;
|
||||||
|
}
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index 82a32492a7..c02dcaeca7 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -35,19 +35,19 @@
|
||||||
|
#define DAX_WINDOW_PROT PROT_NONE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
- int fd)
|
||||||
|
+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
if (!fs) {
|
||||||
|
/* Shouldn't happen - but seen on error path */
|
||||||
|
fprintf(stderr, "%s: Bad fs ptr\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
size_t cache_size = fs->conf.cache_size;
|
||||||
|
if (!cache_size) {
|
||||||
|
fprintf(stderr, "%s: map when DAX cache not present\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
|
||||||
|
@@ -56,7 +56,7 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
|
||||||
|
if (fd < 0) {
|
||||||
|
fprintf(stderr, "%s: Bad fd for map\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES; i++) {
|
||||||
|
@@ -78,11 +78,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
((sm->flags[i] & VHOST_USER_FS_FLAG_MAP_W) ? PROT_WRITE : 0),
|
||||||
|
MAP_SHARED | MAP_FIXED,
|
||||||
|
fd, sm->fd_offset[i]) != (cache_host + sm->c_offset[i])) {
|
||||||
|
+ res = -errno;
|
||||||
|
fprintf(stderr, "%s: map failed err %d [%d] %"
|
||||||
|
PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__,
|
||||||
|
errno, i, sm->c_offset[i], sm->len[i],
|
||||||
|
sm->fd_offset[i]);
|
||||||
|
- res = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -91,10 +91,11 @@ int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
/* Something went wrong, unmap them all */
|
||||||
|
vhost_user_fs_slave_unmap(dev, sm);
|
||||||
|
}
|
||||||
|
- return res;
|
||||||
|
+ return (uint64_t)res;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev,
|
||||||
|
+ VhostUserFSSlaveMsg *sm)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
if (!fs) {
|
||||||
|
@@ -114,7 +115,7 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: unmap when DAX cache not present\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
|
||||||
|
@@ -148,26 +149,27 @@ int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
ptr = mmap(cache_host + sm->c_offset[i], sm->len[i], DAX_WINDOW_PROT,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||||
|
if (ptr != (cache_host + sm->c_offset[i])) {
|
||||||
|
+ res = -errno;
|
||||||
|
fprintf(stderr, "%s: mmap failed (%s) [%d] %"
|
||||||
|
PRIx64 "+%" PRIx64 " from %" PRIx64 " res: %p\n",
|
||||||
|
__func__,
|
||||||
|
strerror(errno),
|
||||||
|
i, sm->c_offset[i], sm->len[i],
|
||||||
|
sm->fd_offset[i], ptr);
|
||||||
|
- res = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- return res;
|
||||||
|
+ return (uint64_t)res;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
+uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev,
|
||||||
|
+ VhostUserFSSlaveMsg *sm)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
size_t cache_size = fs->conf.cache_size;
|
||||||
|
if (!cache_size) {
|
||||||
|
fprintf(stderr, "%s: sync when DAX cache not present\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
void *cache_host = memory_region_get_ram_ptr(&fs->cache);
|
||||||
|
|
||||||
|
@@ -191,26 +193,26 @@ int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm)
|
||||||
|
|
||||||
|
if (msync(cache_host + sm->c_offset[i], sm->len[i],
|
||||||
|
MS_SYNC /* ?? */)) {
|
||||||
|
+ res = -errno;
|
||||||
|
fprintf(stderr, "%s: msync failed (%s) [%d] %"
|
||||||
|
PRIx64 "+%" PRIx64 " from %" PRIx64 "\n", __func__,
|
||||||
|
strerror(errno),
|
||||||
|
i, sm->c_offset[i], sm->len[i],
|
||||||
|
sm->fd_offset[i]);
|
||||||
|
- res = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- return res;
|
||||||
|
+ return (uint64_t)res;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
- int fd)
|
||||||
|
+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd)
|
||||||
|
{
|
||||||
|
VHostUserFS *fs = VHOST_USER_FS(dev->vdev);
|
||||||
|
if (!fs) {
|
||||||
|
/* Shouldn't happen - but seen it in error paths */
|
||||||
|
fprintf(stderr, "%s: Bad fs ptr\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int i;
|
||||||
|
@@ -219,7 +221,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
|
||||||
|
if (fd < 0) {
|
||||||
|
fprintf(stderr, "%s: Bad fd for map\n", __func__);
|
||||||
|
- return -1;
|
||||||
|
+ return (uint64_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < VHOST_USER_FS_SLAVE_ENTRIES && !res; i++) {
|
||||||
|
@@ -285,7 +287,7 @@ int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
* TODO! We should be returning 'done' if possible but our error handling
|
||||||
|
* doesn't know about that yet.
|
||||||
|
*/
|
||||||
|
- return res;
|
||||||
|
+ return (uint64_t)res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
|
||||||
|
index b4ef0102ad..d95dbc39e3 100644
|
||||||
|
--- a/hw/virtio/vhost-user.c
|
||||||
|
+++ b/hw/virtio/vhost-user.c
|
||||||
|
@@ -1325,24 +1325,25 @@ static int vhost_user_reset_device(struct vhost_dev *dev)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
|
||||||
|
+static uint64_t vhost_user_slave_handle_config_change(struct vhost_dev *dev)
|
||||||
|
{
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
if (!dev->config_ops) {
|
||||||
|
- return -1;
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dev->config_ops->vhost_dev_config_notifier) {
|
||||||
|
ret = dev->config_ops->vhost_dev_config_notifier(dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
- return ret;
|
||||||
|
+ return !!ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
|
||||||
|
- VhostUserVringArea *area,
|
||||||
|
- int fd)
|
||||||
|
+static uint64_t vhost_user_slave_handle_vring_host_notifier(
|
||||||
|
+ struct vhost_dev *dev,
|
||||||
|
+ VhostUserVringArea *area,
|
||||||
|
+ int fd)
|
||||||
|
{
|
||||||
|
int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
|
||||||
|
size_t page_size = qemu_real_host_page_size;
|
||||||
|
@@ -1356,7 +1357,7 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
|
||||||
|
if (!virtio_has_feature(dev->protocol_features,
|
||||||
|
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
|
||||||
|
vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
|
||||||
|
- return -1;
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = &user->notifier[queue_idx];
|
||||||
|
@@ -1369,18 +1370,18 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
|
||||||
|
- return 0;
|
||||||
|
+ return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sanity check. */
|
||||||
|
if (area->size != page_size) {
|
||||||
|
- return -1;
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||||
|
fd, area->offset);
|
||||||
|
if (addr == MAP_FAILED) {
|
||||||
|
- return -1;
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
|
||||||
|
@@ -1391,13 +1392,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
|
||||||
|
|
||||||
|
if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
|
||||||
|
munmap(addr, page_size);
|
||||||
|
- return -1;
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->addr = addr;
|
||||||
|
n->set = true;
|
||||||
|
|
||||||
|
- return 0;
|
||||||
|
+ return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void slave_read(void *opaque)
|
||||||
|
@@ -1406,7 +1407,8 @@ static void slave_read(void *opaque)
|
||||||
|
struct vhost_user *u = dev->opaque;
|
||||||
|
VhostUserHeader hdr = { 0, };
|
||||||
|
VhostUserPayload payload = { 0, };
|
||||||
|
- int size, ret = 0;
|
||||||
|
+ int size;
|
||||||
|
+ uint64_t ret = 0;
|
||||||
|
struct iovec iov;
|
||||||
|
struct msghdr msgh;
|
||||||
|
int fd[VHOST_USER_SLAVE_MAX_FDS];
|
||||||
|
@@ -1494,7 +1496,7 @@ static void slave_read(void *opaque)
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
error_report("Received unexpected msg type: %d.", hdr.request);
|
||||||
|
- ret = -EINVAL;
|
||||||
|
+ ret = (uint64_t)-EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Close the remaining file descriptors. */
|
||||||
|
@@ -1515,7 +1517,7 @@ static void slave_read(void *opaque)
|
||||||
|
hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
|
||||||
|
hdr.flags |= VHOST_USER_REPLY_MASK;
|
||||||
|
|
||||||
|
- payload.u64 = !!ret;
|
||||||
|
+ payload.u64 = ret;
|
||||||
|
hdr.size = sizeof(payload.u64);
|
||||||
|
|
||||||
|
iovec[0].iov_base = &hdr;
|
||||||
|
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
|
||||||
|
index 8a6f8e2a7a..64ac6b6444 100644
|
||||||
|
--- a/include/hw/virtio/vhost-backend.h
|
||||||
|
+++ b/include/hw/virtio/vhost-backend.h
|
||||||
|
@@ -186,7 +186,7 @@ int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
|
||||||
|
int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
|
||||||
|
uint64_t iova, uint64_t len);
|
||||||
|
|
||||||
|
-int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
|
||||||
|
+uint64_t vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
|
||||||
|
struct vhost_iotlb_msg *imsg);
|
||||||
|
|
||||||
|
int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd);
|
||||||
|
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
index 0750687463..845cdb0177 100644
|
||||||
|
--- a/include/hw/virtio/vhost-user-fs.h
|
||||||
|
+++ b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
@@ -64,10 +64,13 @@ struct VHostUserFS {
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Callbacks from the vhost-user code for slave commands */
|
||||||
|
-int vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
- int fd);
|
||||||
|
-int vhost_user_fs_slave_unmap(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
-int vhost_user_fs_slave_sync(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm);
|
||||||
|
-int vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm, int fd);
|
||||||
|
+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
+ int fd);
|
||||||
|
+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev,
|
||||||
|
+ VhostUserFSSlaveMsg *sm);
|
||||||
|
+uint64_t vhost_user_fs_slave_sync(struct vhost_dev *dev,
|
||||||
|
+ VhostUserFSSlaveMsg *sm);
|
||||||
|
+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev,
|
||||||
|
+ VhostUserFSSlaveMsg *sm, int fd);
|
||||||
|
|
||||||
|
#endif /* _QEMU_VHOST_USER_FS_H */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,97 @@
|
|||||||
|
From 55b6372e1b893e77c6c4d5e87bd1a0765126399c Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Mon, 20 May 2019 20:02:29 +0100
|
||||||
|
Subject: [PATCH 23/29] DAX: libvhost-user: Route slave message payload
|
||||||
|
|
||||||
|
Route the uint64 payload from message replies on the slave back up
|
||||||
|
through vu_process_message_reply and to the callers.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
subprojects/libvhost-user/libvhost-user.c | 20 ++++++++++++++++----
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 2 ++
|
||||||
|
2 files changed, 18 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
index 9b8223b5d5..a1cbb626d2 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
@@ -403,9 +403,11 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
|
||||||
|
* Processes a reply on the slave channel.
|
||||||
|
* Entered with slave_mutex held and releases it before exit.
|
||||||
|
* Returns true on success.
|
||||||
|
+ * *payload is written on success
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
-vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
|
||||||
|
+vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg,
|
||||||
|
+ uint64_t *payload)
|
||||||
|
{
|
||||||
|
VhostUserMsg msg_reply;
|
||||||
|
bool result = false;
|
||||||
|
@@ -425,7 +427,8 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
- result = msg_reply.payload.u64 == 0;
|
||||||
|
+ *payload = msg_reply.payload.u64;
|
||||||
|
+ result = true;
|
||||||
|
|
||||||
|
out:
|
||||||
|
pthread_mutex_unlock(&dev->slave_mutex);
|
||||||
|
@@ -1312,6 +1315,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
|
||||||
|
{
|
||||||
|
int qidx = vq - dev->vq;
|
||||||
|
int fd_num = 0;
|
||||||
|
+ bool res;
|
||||||
|
+ uint64_t payload = 0;
|
||||||
|
VhostUserMsg vmsg = {
|
||||||
|
.request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
|
||||||
|
.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
|
||||||
|
@@ -1342,7 +1347,10 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Also unlocks the slave_mutex */
|
||||||
|
- return vu_process_message_reply(dev, &vmsg);
|
||||||
|
+ res = vu_process_message_reply(dev, &vmsg, &payload);
|
||||||
|
+ res = res && (payload == 0);
|
||||||
|
+
|
||||||
|
+ return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
@@ -2915,6 +2923,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
VhostUserFSSlaveMsg *fsm)
|
||||||
|
{
|
||||||
|
int fd_num = 0;
|
||||||
|
+ bool res;
|
||||||
|
+ uint64_t payload = 0;
|
||||||
|
VhostUserMsg vmsg = {
|
||||||
|
.request = req,
|
||||||
|
.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
|
||||||
|
@@ -2939,6 +2949,8 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Also unlocks the slave_mutex */
|
||||||
|
- return vu_process_message_reply(dev, &vmsg);
|
||||||
|
+ res = vu_process_message_reply(dev, &vmsg, &payload);
|
||||||
|
+ res = res && (payload == 0);
|
||||||
|
+ return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index 1f4c7fff35..416d285844 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -419,6 +419,8 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
buf->buf[0].pos += msg.len[0];
|
||||||
|
in_sg_ptr++;
|
||||||
|
bad_in_num--;
|
||||||
|
+ } else if (req_res == 0) {
|
||||||
|
+ break;
|
||||||
|
} else {
|
||||||
|
ret = EIO;
|
||||||
|
free(in_sg_cpy);
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,240 @@
|
|||||||
|
From 5e0e90706b03fa71072b6b17779e0a66cb14aa64 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Tue, 21 May 2019 15:10:05 +0100
|
||||||
|
Subject: [PATCH 24/29] DAX: virtiofsd: Rework fs-cache-request error path
|
||||||
|
|
||||||
|
Rework error values all the way back to the guest for IO requests.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
hw/virtio/vhost-user-fs.c | 9 +++--
|
||||||
|
subprojects/libvhost-user/libvhost-user.c | 18 ++++++----
|
||||||
|
subprojects/libvhost-user/libvhost-user.h | 6 ++--
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 11 ++++---
|
||||||
|
tools/virtiofsd/fuse_virtio.c | 40 +++++++++++------------
|
||||||
|
5 files changed, 45 insertions(+), 39 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index c02dcaeca7..b43725824f 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -283,11 +283,10 @@ uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
trace_vhost_user_fs_slave_io_exit(res, done);
|
||||||
|
- /*
|
||||||
|
- * TODO! We should be returning 'done' if possible but our error handling
|
||||||
|
- * doesn't know about that yet.
|
||||||
|
- */
|
||||||
|
- return (uint64_t)res;
|
||||||
|
+ if (res < 0) {
|
||||||
|
+ return (uint64_t)res;
|
||||||
|
+ }
|
||||||
|
+ return (uint64_t)done;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
index a1cbb626d2..4cf4aef63d 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.c
|
||||||
|
@@ -2919,8 +2919,8 @@ vu_queue_push(VuDev *dev, VuVirtq *vq,
|
||||||
|
vu_queue_inflight_post_put(dev, vq, elem->index);
|
||||||
|
}
|
||||||
|
|
||||||
|
-bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
- VhostUserFSSlaveMsg *fsm)
|
||||||
|
+int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
+ VhostUserFSSlaveMsg *fsm)
|
||||||
|
{
|
||||||
|
int fd_num = 0;
|
||||||
|
bool res;
|
||||||
|
@@ -2939,18 +2939,24 @@ bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
vmsg.fd_num = fd_num;
|
||||||
|
|
||||||
|
if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) {
|
||||||
|
- return false;
|
||||||
|
+ return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&dev->slave_mutex);
|
||||||
|
if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
|
||||||
|
pthread_mutex_unlock(&dev->slave_mutex);
|
||||||
|
- return false;
|
||||||
|
+ return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Also unlocks the slave_mutex */
|
||||||
|
res = vu_process_message_reply(dev, &vmsg, &payload);
|
||||||
|
- res = res && (payload == 0);
|
||||||
|
- return res;
|
||||||
|
+ if (!res) {
|
||||||
|
+ return -EIO;
|
||||||
|
+ }
|
||||||
|
+ /*
|
||||||
|
+ * Payload is delivered as uint64_t but is actually signed for
|
||||||
|
+ * errors.
|
||||||
|
+ */
|
||||||
|
+ return (int64_t)payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
index 4b6e681a3e..ee75d4931f 100644
|
||||||
|
--- a/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
+++ b/subprojects/libvhost-user/libvhost-user.h
|
||||||
|
@@ -723,9 +723,9 @@ bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
|
||||||
|
* @fd: an fd (only required for map, else must be -1)
|
||||||
|
* @fsm: The body of the message
|
||||||
|
*
|
||||||
|
- * Returns: true if the reply was 0
|
||||||
|
+ * Returns: 0 or above for success, negative errno on error
|
||||||
|
*/
|
||||||
|
-bool vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
- VhostUserFSSlaveMsg *fsm);
|
||||||
|
+int64_t vu_fs_cache_request(VuDev *dev, VhostUserSlaveRequest req, int fd,
|
||||||
|
+ VhostUserFSSlaveMsg *fsm);
|
||||||
|
|
||||||
|
#endif /* LIBVHOST_USER_H */
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index e543f64177..a36a893871 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -1998,7 +1998,7 @@ int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf);
|
||||||
|
* @param fd The fd to map
|
||||||
|
* @return Zero on success
|
||||||
|
*/
|
||||||
|
-int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
+int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For use with virtio-fs; request unmapping of part of the cache
|
||||||
|
@@ -2007,7 +2007,7 @@ int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
* @param msg A set of unmapping requests
|
||||||
|
* @return Zero on success
|
||||||
|
*/
|
||||||
|
-int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg);
|
||||||
|
+int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For use with virtio-fs; request synchronisation of part of the cache
|
||||||
|
@@ -2017,7 +2017,7 @@ int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg);
|
||||||
|
* @param msg A set of syncing requests
|
||||||
|
* @return Zero on success
|
||||||
|
*/
|
||||||
|
-int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg);
|
||||||
|
+int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For use with virtio-fs; request IO directly to memory
|
||||||
|
@@ -2025,9 +2025,10 @@ int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg);
|
||||||
|
* @param se The current session
|
||||||
|
* @param msg A set of IO requests
|
||||||
|
* @param fd The fd to map
|
||||||
|
- * @return Zero on success
|
||||||
|
+ * @return Length on success, negative errno on error
|
||||||
|
*/
|
||||||
|
-int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd);
|
||||||
|
+int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg,
|
||||||
|
+ int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For use with virtio-fs; wrapper for fuse_virtio_io for writes
|
||||||
|
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
index 416d285844..9577eaa68d 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_virtio.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_virtio.c
|
||||||
|
@@ -408,13 +408,13 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
if (len < msg.len[0]) {
|
||||||
|
msg.len[0] = len;
|
||||||
|
}
|
||||||
|
- bool req_res = !fuse_virtio_io(se, &msg, buf->buf[0].fd);
|
||||||
|
+ int64_t req_res = fuse_virtio_io(se, &msg, buf->buf[0].fd);
|
||||||
|
fuse_log(FUSE_LOG_DEBUG,
|
||||||
|
"%s: bad loop; len=%zd bad_in_num=%d fd_offset=%zd "
|
||||||
|
- "c_offset=%p req_res=%d\n",
|
||||||
|
+ "c_offset=%p req_res=%ld\n",
|
||||||
|
__func__, len, bad_in_num, buf->buf[0].pos,
|
||||||
|
in_sg_ptr[0].iov_base, req_res);
|
||||||
|
- if (req_res) {
|
||||||
|
+ if (req_res > 0) {
|
||||||
|
len -= msg.len[0];
|
||||||
|
buf->buf[0].pos += msg.len[0];
|
||||||
|
in_sg_ptr++;
|
||||||
|
@@ -422,7 +422,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
|
||||||
|
} else if (req_res == 0) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
- ret = EIO;
|
||||||
|
+ ret = req_res;
|
||||||
|
free(in_sg_cpy);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
@@ -1155,40 +1155,41 @@ void virtio_session_close(struct fuse_session *se)
|
||||||
|
se->virtio_dev = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd)
|
||||||
|
+int64_t fuse_virtio_map(fuse_req_t req, VhostUserFSSlaveMsg *msg, int fd)
|
||||||
|
{
|
||||||
|
if (!req->se->virtio_dev) {
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
- return !vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
- VHOST_USER_SLAVE_FS_MAP, fd, msg);
|
||||||
|
+ return vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
+ VHOST_USER_SLAVE_FS_MAP, fd, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
-int fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg)
|
||||||
|
+int64_t fuse_virtio_unmap(struct fuse_session *se, VhostUserFSSlaveMsg *msg)
|
||||||
|
{
|
||||||
|
if (!se->virtio_dev) {
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
- return !vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP,
|
||||||
|
- -1, msg);
|
||||||
|
+ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_UNMAP,
|
||||||
|
+ -1, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
-int fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg)
|
||||||
|
+int64_t fuse_virtio_sync(fuse_req_t req, VhostUserFSSlaveMsg *msg)
|
||||||
|
{
|
||||||
|
if (!req->se->virtio_dev) {
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
- return !vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
- VHOST_USER_SLAVE_FS_SYNC, -1, msg);
|
||||||
|
+ return vu_fs_cache_request(&req->se->virtio_dev->dev,
|
||||||
|
+ VHOST_USER_SLAVE_FS_SYNC, -1, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
-int fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg, int fd)
|
||||||
|
+int64_t fuse_virtio_io(struct fuse_session *se, VhostUserFSSlaveMsg *msg,
|
||||||
|
+ int fd)
|
||||||
|
{
|
||||||
|
if (!se->virtio_dev) {
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
- return !vu_fs_cache_request(&se->virtio_dev->dev,
|
||||||
|
- VHOST_USER_SLAVE_FS_IO, fd, msg);
|
||||||
|
+ return vu_fs_cache_request(&se->virtio_dev->dev, VHOST_USER_SLAVE_FS_IO, fd,
|
||||||
|
+ msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -1214,8 +1215,7 @@ ssize_t fuse_virtio_write(fuse_req_t req, const struct fuse_buf *dst,
|
||||||
|
msg.len[0] = len;
|
||||||
|
msg.flags[0] = VHOST_USER_FS_FLAG_MAP_W;
|
||||||
|
|
||||||
|
- bool result = !fuse_virtio_io(req->se, &msg, dst->fd);
|
||||||
|
- /* TODO: Rework the result path to actually get length/error */
|
||||||
|
- fuse_log(FUSE_LOG_DEBUG, "%s: result=%d\n", __func__, result);
|
||||||
|
- return result ? len : -EIO;
|
||||||
|
+ int64_t result = fuse_virtio_io(req->se, &msg, dst->fd);
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG, "%s: result=%ld\n", __func__, result);
|
||||||
|
+ return result;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,76 @@
|
|||||||
|
From 0946e9a802943443333eb7e8c6a0989f37c236a5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Peng Tao <tao.peng@linux.alibaba.com>
|
||||||
|
Date: Mon, 3 Jun 2019 10:47:19 +0800
|
||||||
|
Subject: [PATCH 25/29] DAX: virtiofsd: make FUSE_REMOVEMAPPING support
|
||||||
|
multiple entries
|
||||||
|
|
||||||
|
The fuse wire protocol is changed so that we can unmap multiple
|
||||||
|
mappings in a single call.
|
||||||
|
|
||||||
|
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
|
||||||
|
fix by: Catherine Ho <catherine.hecx@gmail.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 5 +++--
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++--------
|
||||||
|
2 files changed, 21 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index a2480d4aa1..99ba000c2e 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -1920,12 +1920,13 @@ static void do_removemapping(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
struct fuse_removemapping_one *one;
|
||||||
|
|
||||||
|
arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
|
||||||
|
- if (!arg) {
|
||||||
|
+ if (!arg || arg->count <= 0) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "do_removemapping: invalid arg %p\n", arg);
|
||||||
|
fuse_reply_err(req, EINVAL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- one = fuse_mbuf_iter_advance(iter, sizeof(*one));
|
||||||
|
+ one = fuse_mbuf_iter_advance(iter, arg->count * sizeof(*one));
|
||||||
|
if (!one) {
|
||||||
|
fuse_log(
|
||||||
|
FUSE_LOG_ERR,
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index ab33fabcda..3af55ffb8a 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -2965,14 +2965,24 @@ static void lo_removemapping(fuse_req_t req, struct fuse_session *se,
|
||||||
|
VhostUserFSSlaveMsg msg = { 0 };
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
- msg.len[0] = argp->len;
|
||||||
|
- msg.c_offset[0] = argp->moffset;
|
||||||
|
- if (fuse_virtio_unmap(se, &msg)) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- "%s: unmap over virtio failed "
|
||||||
|
- "(offset=0x%lx, len=0x%lx)\n",
|
||||||
|
- __func__, argp->moffset, argp->len);
|
||||||
|
- ret = EINVAL;
|
||||||
|
+ for (int i = 0; num > 0; i++, argp++) {
|
||||||
|
+ msg.len[i] = argp->len;
|
||||||
|
+ msg.c_offset[i] = argp->moffset;
|
||||||
|
+
|
||||||
|
+ if (--num == 0 || i == VHOST_USER_FS_SLAVE_ENTRIES - 1) {
|
||||||
|
+ if (fuse_virtio_unmap(se, &msg)) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR,
|
||||||
|
+ "%s: unmap over virtio failed "
|
||||||
|
+ "(offset=0x%lx, len=0x%lx)\n",
|
||||||
|
+ __func__, argp->moffset, argp->len);
|
||||||
|
+ ret = EINVAL;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ if (num > 0) {
|
||||||
|
+ i = 0;
|
||||||
|
+ memset(&msg, 0, sizeof(msg));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
fuse_reply_err(req, ret);
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,42 @@
|
|||||||
|
From e684fffcaf21baf0f4341091303ce3c2dcbf822d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Fri, 26 Jul 2019 09:33:22 +0100
|
||||||
|
Subject: [PATCH 26/29] DAX:virtiofsd: implement FUSE_INIT map_alignment field
|
||||||
|
|
||||||
|
Communicate the host page size to the FUSE client so that
|
||||||
|
FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING requests are aware of our alignment
|
||||||
|
constraints.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 7 +++++++
|
||||||
|
1 file changed, 7 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index 99ba000c2e..d6256f571b 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -2188,6 +2188,12 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
outarg.max_background = se->conn.max_background;
|
||||||
|
outarg.congestion_threshold = se->conn.congestion_threshold;
|
||||||
|
outarg.time_gran = se->conn.time_gran;
|
||||||
|
+ if (arg->flags & FUSE_MAP_ALIGNMENT) {
|
||||||
|
+ outarg.flags |= FUSE_MAP_ALIGNMENT;
|
||||||
|
+
|
||||||
|
+ /* This constraint comes from mmap(2) and munmap(2) */
|
||||||
|
+ outarg.map_alignment = ffsl(sysconf(_SC_PAGE_SIZE)) - 1;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor);
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
|
||||||
|
@@ -2197,6 +2203,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n",
|
||||||
|
outarg.congestion_threshold);
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran);
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG, " map_alignment=%u\n", outarg.map_alignment);
|
||||||
|
|
||||||
|
send_reply_ok(req, &outarg, outargsize);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,776 @@
|
|||||||
|
From a0cbb60bb58ffaf2ae771c7822f0cb25762076fa Mon Sep 17 00:00:00 2001
|
||||||
|
From: Miklos Szeredi <mszeredi@redhat.com>
|
||||||
|
Date: Wed, 20 Nov 2019 14:27:19 +0000
|
||||||
|
Subject: [PATCH 27/29] virtiofsd: add initial support for shared versions
|
||||||
|
|
||||||
|
Not backward compatible with previous kernels, so please only use with
|
||||||
|
kernel that has version table support (this will need to be cleaned up).
|
||||||
|
|
||||||
|
No READDIRPLUS support in the kernel for versioned entries, so disable for
|
||||||
|
now.
|
||||||
|
|
||||||
|
Attribute timeout is set to "infinity", so changes to underlying filesystem
|
||||||
|
won't be visible. This also needs to be fixed, but is best for testing the
|
||||||
|
versioning since the shared version is the only thing that will force
|
||||||
|
refreshing metadata and dcache lookups.
|
||||||
|
|
||||||
|
No caching metadata modifications yet.
|
||||||
|
|
||||||
|
Start "ireg" daemon before starting any fuse servers.
|
||||||
|
|
||||||
|
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
|
||||||
|
Fix by:
|
||||||
|
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
|
||||||
|
Only send entryver_out when shared is enabled by:
|
||||||
|
With help message update from:
|
||||||
|
Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
---
|
||||||
|
include/standard-headers/linux/fuse.h | 5 +
|
||||||
|
tools/virtiofsd/fuse_lowlevel.c | 36 ++-
|
||||||
|
tools/virtiofsd/fuse_lowlevel.h | 9 +-
|
||||||
|
tools/virtiofsd/helper.c | 4 +
|
||||||
|
tools/virtiofsd/ireg.h | 33 +++
|
||||||
|
tools/virtiofsd/passthrough_ll.c | 321 +++++++++++++++++++++++++-
|
||||||
|
6 files changed, 387 insertions(+), 21 deletions(-)
|
||||||
|
create mode 100644 tools/virtiofsd/ireg.h
|
||||||
|
|
||||||
|
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
|
||||||
|
index 82c0a38b59..fbced7caef 100644
|
||||||
|
--- a/include/standard-headers/linux/fuse.h
|
||||||
|
+++ b/include/standard-headers/linux/fuse.h
|
||||||
|
@@ -510,6 +510,11 @@ struct fuse_entry_out {
|
||||||
|
struct fuse_attr attr;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct fuse_entryver_out {
|
||||||
|
+ uint64_t version_index;
|
||||||
|
+ int64_t initial_version;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct fuse_forget_in {
|
||||||
|
uint64_t nlookup;
|
||||||
|
};
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
index d6256f571b..47231378db 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.c
|
||||||
|
@@ -389,28 +389,46 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
|
||||||
|
+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e,
|
||||||
|
+ bool shared)
|
||||||
|
{
|
||||||
|
- struct fuse_entry_out arg;
|
||||||
|
- size_t size = sizeof(arg);
|
||||||
|
+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_entryver_out)];
|
||||||
|
+ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
|
||||||
|
+ struct fuse_entryver_out *ever =
|
||||||
|
+ (struct fuse_entryver_out *)(buf + sizeof(struct fuse_entry_out));
|
||||||
|
+ size_t size = sizeof(buf);
|
||||||
|
|
||||||
|
- memset(&arg, 0, sizeof(arg));
|
||||||
|
- fill_entry(&arg, e);
|
||||||
|
- return send_reply_ok(req, &arg, size);
|
||||||
|
+ if ((req->se->conn.proto_minor >= 9) && !shared) {
|
||||||
|
+ size -= sizeof(struct fuse_entryver_out);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memset(buf, 0, sizeof(buf));
|
||||||
|
+ fill_entry(earg, e);
|
||||||
|
+ ever->initial_version = e->initial_version;
|
||||||
|
+ ever->version_index = e->version_offset;
|
||||||
|
+ return send_reply_ok(req, buf, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
|
||||||
|
- const struct fuse_file_info *f)
|
||||||
|
+ const struct fuse_file_info *f, bool shared)
|
||||||
|
{
|
||||||
|
- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)];
|
||||||
|
+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out) +
|
||||||
|
+ sizeof(struct fuse_entryver_out)];
|
||||||
|
size_t entrysize = sizeof(struct fuse_entry_out);
|
||||||
|
struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
|
||||||
|
struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
|
||||||
|
+ struct fuse_entryver_out *ever =
|
||||||
|
+ (struct fuse_entryver_out *)(buf + entrysize +
|
||||||
|
+ sizeof(struct fuse_open_out));
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
fill_entry(earg, e);
|
||||||
|
fill_open(oarg, f);
|
||||||
|
- return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out));
|
||||||
|
+ ever->initial_version = e->initial_version;
|
||||||
|
+ ever->version_index = e->version_offset;
|
||||||
|
+ return send_reply_ok(req, buf,
|
||||||
|
+ entrysize + sizeof(struct fuse_open_out) +
|
||||||
|
+ (shared ? sizeof(struct fuse_entryver_out) : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
|
||||||
|
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
index a36a893871..5f60e3fd2c 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_lowlevel.h
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
#include "fuse_common.h"
|
||||||
|
#include "standard-headers/linux/fuse.h"
|
||||||
|
|
||||||
|
+#include <stdbool.h>
|
||||||
|
#include <sys/statvfs.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
|
#include <utime.h>
|
||||||
|
@@ -104,6 +105,9 @@ struct fuse_entry_param {
|
||||||
|
* Flags for fuse_attr.flags that do not fit into attr.
|
||||||
|
*/
|
||||||
|
uint32_t attr_flags;
|
||||||
|
+
|
||||||
|
+ uint64_t version_offset;
|
||||||
|
+ int64_t initial_version;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
@@ -1294,7 +1298,8 @@ void fuse_reply_none(fuse_req_t req);
|
||||||
|
* @param e the entry parameters
|
||||||
|
* @return zero for success, -errno for failure to send reply
|
||||||
|
*/
|
||||||
|
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e);
|
||||||
|
+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e,
|
||||||
|
+ bool shared);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reply with a directory entry and open parameters
|
||||||
|
@@ -1314,7 +1319,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e);
|
||||||
|
* @return zero for success, -errno for failure to send reply
|
||||||
|
*/
|
||||||
|
int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
|
||||||
|
- const struct fuse_file_info *fi);
|
||||||
|
+ const struct fuse_file_info *fi, bool shared);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reply with attributes
|
||||||
|
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
|
||||||
|
index 28243b51b2..29331ec2fc 100644
|
||||||
|
--- a/tools/virtiofsd/helper.c
|
||||||
|
+++ b/tools/virtiofsd/helper.c
|
||||||
|
@@ -174,6 +174,10 @@ void fuse_cmdline_help(void)
|
||||||
|
" default: no_xattr\n"
|
||||||
|
" -o modcaps=CAPLIST Modify the list of capabilities\n"
|
||||||
|
" e.g. -o modcaps=+sys_admin:-chown\n"
|
||||||
|
+ " -o shared|no_shared enable/disable shared cache\n"
|
||||||
|
+ " default: no_shared\n"
|
||||||
|
+ " please start 'ireg' daemon before "
|
||||||
|
+ " using shared cache\n"
|
||||||
|
" --rlimit-nofile=<num> set maximum number of file descriptors\n"
|
||||||
|
" (0 leaves rlimit unchanged)\n"
|
||||||
|
" default: min(1000000, fs.file-max - 16384)\n"
|
||||||
|
diff --git a/tools/virtiofsd/ireg.h b/tools/virtiofsd/ireg.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..91c0f386d7
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tools/virtiofsd/ireg.h
|
||||||
|
@@ -0,0 +1,33 @@
|
||||||
|
+#define VERSION_TABLE_MAGIC 0x7265566465726853
|
||||||
|
+
|
||||||
|
+enum ireg_op {
|
||||||
|
+ IREG_GET,
|
||||||
|
+ IREG_PUT,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct ireg_msg {
|
||||||
|
+ enum ireg_op op;
|
||||||
|
+ uint64_t handle;
|
||||||
|
+ union {
|
||||||
|
+ struct {
|
||||||
|
+ uint64_t ino;
|
||||||
|
+ uint64_t dev;
|
||||||
|
+ } get;
|
||||||
|
+ struct {
|
||||||
|
+ uint64_t refid;
|
||||||
|
+ } put;
|
||||||
|
+ };
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+enum srv_op {
|
||||||
|
+ SRV_VERSION,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct srv_msg {
|
||||||
|
+ enum srv_op op;
|
||||||
|
+ uint64_t handle;
|
||||||
|
+ struct {
|
||||||
|
+ uint64_t refid;
|
||||||
|
+ uint64_t offset;
|
||||||
|
+ } version;
|
||||||
|
+};
|
||||||
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
index 3af55ffb8a..52a52b2dd7 100644
|
||||||
|
--- a/tools/virtiofsd/passthrough_ll.c
|
||||||
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
||||||
|
@@ -44,16 +44,21 @@
|
||||||
|
#include <cap-ng.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
+#include <semaphore.h>
|
||||||
|
#include <sys/file.h>
|
||||||
|
+#include <sys/mman.h>
|
||||||
|
#include <sys/mount.h>
|
||||||
|
#include <sys/prctl.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
+#include <sys/socket.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
+#include <sys/un.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <sys/xattr.h>
|
||||||
|
#include <syslog.h>
|
||||||
|
|
||||||
|
#include "qemu/cutils.h"
|
||||||
|
+#include "ireg.h"
|
||||||
|
#include "passthrough_helpers.h"
|
||||||
|
#include "passthrough_seccomp.h"
|
||||||
|
|
||||||
|
@@ -110,6 +115,8 @@ struct lo_inode {
|
||||||
|
*/
|
||||||
|
uint64_t nlookup;
|
||||||
|
|
||||||
|
+ uint64_t version_offset;
|
||||||
|
+ uint64_t ireg_refid;
|
||||||
|
fuse_ino_t fuse_ino;
|
||||||
|
pthread_mutex_t plock_mutex;
|
||||||
|
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
|
||||||
|
@@ -152,12 +159,16 @@ struct lo_data {
|
||||||
|
char *modcaps;
|
||||||
|
double timeout;
|
||||||
|
int cache;
|
||||||
|
+ int shared;
|
||||||
|
int timeout_set;
|
||||||
|
int readdirplus_set;
|
||||||
|
int readdirplus_clear;
|
||||||
|
int allow_direct_io;
|
||||||
|
int announce_submounts;
|
||||||
|
bool use_statx;
|
||||||
|
+ int ireg_sock;
|
||||||
|
+ int64_t *version_table;
|
||||||
|
+ uint64_t version_table_size;
|
||||||
|
struct lo_inode root;
|
||||||
|
GHashTable *inodes; /* protected by lo->mutex */
|
||||||
|
struct lo_map ino_map; /* protected by lo->mutex */
|
||||||
|
@@ -193,6 +204,8 @@ static const struct fuse_opt lo_opts[] = {
|
||||||
|
{ "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
|
||||||
|
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
|
||||||
|
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
|
||||||
|
+ { "shared", offsetof(struct lo_data, shared), 1 },
|
||||||
|
+ { "no_shared", offsetof(struct lo_data, shared), 0 },
|
||||||
|
{ "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
|
||||||
|
{ "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
|
||||||
|
{ "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
|
||||||
|
@@ -204,6 +217,7 @@ static bool use_syslog = false;
|
||||||
|
static int current_log_level;
|
||||||
|
static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
|
||||||
|
uint64_t n);
|
||||||
|
+static void put_shared(struct lo_data *lo, struct lo_inode *inode);
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
@@ -512,6 +526,7 @@ static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
|
||||||
|
|
||||||
|
if (g_atomic_int_dec_and_test(&inode->refcount)) {
|
||||||
|
close(inode->fd);
|
||||||
|
+ put_shared(lo, inode);
|
||||||
|
free(inode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -587,8 +602,9 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /* TODO: shared version support for readdirplus */
|
||||||
|
if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
|
||||||
|
- lo->readdirplus_clear) {
|
||||||
|
+ lo->readdirplus_clear || lo->shared) {
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
|
||||||
|
conn->want &= ~FUSE_CAP_READDIRPLUS;
|
||||||
|
}
|
||||||
|
@@ -600,6 +616,29 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int64_t *version_ptr(struct lo_data *lo, struct lo_inode *inode)
|
||||||
|
+{
|
||||||
|
+ return lo->version_table + inode->version_offset;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int64_t get_version(struct lo_data *lo, struct lo_inode *inode)
|
||||||
|
+{
|
||||||
|
+ if (!inode->version_offset) {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return __atomic_load_8(version_ptr(lo, inode), __ATOMIC_SEQ_CST);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void update_version(struct lo_data *lo, struct lo_inode *inode)
|
||||||
|
+{
|
||||||
|
+ if (!inode->version_offset) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ __atomic_add_fetch(version_ptr(lo, inode), 1, __ATOMIC_SEQ_CST);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
|
||||||
|
struct fuse_file_info *fi)
|
||||||
|
{
|
||||||
|
@@ -731,6 +770,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
lo_inode_put(lo, &inode);
|
||||||
|
|
||||||
|
return lo_getattr(req, ino, fi);
|
||||||
|
@@ -763,6 +803,74 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
+struct msgreply {
|
||||||
|
+ struct lo_inode *inode;
|
||||||
|
+ sem_t ready;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static void get_shared(struct lo_data *lo, struct lo_inode *inode)
|
||||||
|
+{
|
||||||
|
+ int res;
|
||||||
|
+ struct msgreply rep = {
|
||||||
|
+ .inode = inode,
|
||||||
|
+ };
|
||||||
|
+ struct ireg_msg msg = {
|
||||||
|
+ .op = IREG_GET,
|
||||||
|
+ .handle = (uintptr_t) &rep,
|
||||||
|
+ .get = {
|
||||||
|
+ .ino = inode->key.ino,
|
||||||
|
+ .dev = inode->key.dev,
|
||||||
|
+ },
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ if (lo->ireg_sock == -1) {
|
||||||
|
+ inode->version_offset = 0;
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ sem_init(&rep.ready, 0, 0);
|
||||||
|
+
|
||||||
|
+ res = write(lo->ireg_sock, &msg, sizeof(msg));
|
||||||
|
+ if (res != sizeof(msg)) {
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING,
|
||||||
|
+ "write(lo->ireg_sock, {IREG_GET, ...}): %m\n");
|
||||||
|
+ } else {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res);
|
||||||
|
+ }
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ while (sem_wait(&rep.ready)) {
|
||||||
|
+ ;
|
||||||
|
+ }
|
||||||
|
+ sem_destroy(&rep.ready);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void put_shared(struct lo_data *lo, struct lo_inode *inode)
|
||||||
|
+{
|
||||||
|
+ int res;
|
||||||
|
+ struct ireg_msg msg = {
|
||||||
|
+ .op = IREG_PUT,
|
||||||
|
+ .put.refid = inode->ireg_refid,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ if (lo->ireg_sock == -1) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ res = write(lo->ireg_sock, &msg, sizeof(msg));
|
||||||
|
+ if (res != sizeof(msg)) {
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING,
|
||||||
|
+ "write(lo->ireg_sock, {IREG_PUT, ...}): %m\n");
|
||||||
|
+ } else {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "short write to ireg_sock: %i\n", res);
|
||||||
|
+ }
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* value_destroy_func for posix_locks GHashTable */
|
||||||
|
static void posix_locks_value_destroy(gpointer data)
|
||||||
|
{
|
||||||
|
@@ -908,16 +1016,30 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||||
|
g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&lo->mutex);
|
||||||
|
+ get_shared(lo, inode);
|
||||||
|
inode->fuse_ino = lo_add_inode_mapping(req, inode);
|
||||||
|
g_hash_table_insert(lo->inodes, &inode->key, inode);
|
||||||
|
pthread_mutex_unlock(&lo->mutex);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ e->initial_version = get_version(lo, inode);
|
||||||
|
+ res = fstatat(inode->fd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ saverr = errno;
|
||||||
|
+ unref_inode_lolocked(lo, inode, 1);
|
||||||
|
+ errno = saverr;
|
||||||
|
+ goto out_err;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
e->ino = inode->fuse_ino;
|
||||||
|
+ e->version_offset = inode->version_offset;
|
||||||
|
lo_inode_put(lo, &inode);
|
||||||
|
lo_inode_put(lo, &dir);
|
||||||
|
|
||||||
|
- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
|
||||||
|
- name, (unsigned long long)e->ino);
|
||||||
|
+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli (version_table[%lli]=%lli)\n",
|
||||||
|
+ (unsigned long long)parent, name, (unsigned long long)e->ino,
|
||||||
|
+ (unsigned long long)e->version_offset,
|
||||||
|
+ (unsigned long long)e->initial_version);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
@@ -952,7 +1074,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
|
||||||
|
if (err) {
|
||||||
|
fuse_reply_err(req, err);
|
||||||
|
} else {
|
||||||
|
- fuse_reply_entry(req, &e);
|
||||||
|
+ fuse_reply_entry(req, &e, lo_data(req)->shared);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1056,6 +1178,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ update_version(lo, dir);
|
||||||
|
+
|
||||||
|
saverr = lo_do_lookup(req, parent, name, &e);
|
||||||
|
if (saverr) {
|
||||||
|
goto out;
|
||||||
|
@@ -1064,7 +1188,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
|
||||||
|
name, (unsigned long long)e.ino);
|
||||||
|
|
||||||
|
- fuse_reply_entry(req, &e);
|
||||||
|
+ fuse_reply_entry(req, &e, lo->shared);
|
||||||
|
lo_inode_put(lo, &dir);
|
||||||
|
return;
|
||||||
|
|
||||||
|
@@ -1134,11 +1258,13 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
|
||||||
|
inode->nlookup++;
|
||||||
|
pthread_mutex_unlock(&lo->mutex);
|
||||||
|
e.ino = inode->fuse_ino;
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+ update_version(lo, parent_inode);
|
||||||
|
|
||||||
|
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
|
||||||
|
name, (unsigned long long)e.ino);
|
||||||
|
|
||||||
|
- fuse_reply_entry(req, &e);
|
||||||
|
+ fuse_reply_entry(req, &e, lo->shared);
|
||||||
|
lo_inode_put(lo, &parent_inode);
|
||||||
|
lo_inode_put(lo, &inode);
|
||||||
|
return;
|
||||||
|
@@ -1192,8 +1318,21 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
|
||||||
|
}
|
||||||
|
|
||||||
|
res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_reply_err(req, errno);
|
||||||
|
+ } else {
|
||||||
|
+ struct lo_inode *parent_inode;
|
||||||
|
|
||||||
|
- fuse_reply_err(req, res == -1 ? errno : 0);
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+
|
||||||
|
+ parent_inode = lo_inode(req, parent);
|
||||||
|
+ if (parent_inode) {
|
||||||
|
+ update_version(lo, parent_inode);
|
||||||
|
+ lo_inode_put(lo, &parent_inode);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fuse_reply_err(req, 0);
|
||||||
|
+ }
|
||||||
|
unref_inode_lolocked(lo, inode, 1);
|
||||||
|
lo_inode_put(lo, &inode);
|
||||||
|
}
|
||||||
|
@@ -1245,8 +1384,18 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||||
|
}
|
||||||
|
|
||||||
|
res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_reply_err(req, errno);
|
||||||
|
+ } else {
|
||||||
|
+ update_version(lo, oldinode);
|
||||||
|
+ if (newinode) {
|
||||||
|
+ update_version(lo, newinode);
|
||||||
|
+ }
|
||||||
|
+ update_version(lo, parent_inode);
|
||||||
|
+ update_version(lo, newparent_inode);
|
||||||
|
+ fuse_reply_err(req, 0);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- fuse_reply_err(req, res == -1 ? errno : 0);
|
||||||
|
out:
|
||||||
|
unref_inode_lolocked(lo, oldinode, 1);
|
||||||
|
unref_inode_lolocked(lo, newinode, 1);
|
||||||
|
@@ -1274,8 +1423,21 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
|
||||||
|
}
|
||||||
|
|
||||||
|
res = unlinkat(lo_fd(req, parent), name, 0);
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_reply_err(req, errno);
|
||||||
|
+ } else {
|
||||||
|
+ struct lo_inode *parent_inode;
|
||||||
|
|
||||||
|
- fuse_reply_err(req, res == -1 ? errno : 0);
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+
|
||||||
|
+ parent_inode = lo_inode(req, parent);
|
||||||
|
+ if (parent_inode) {
|
||||||
|
+ update_version(lo, parent_inode);
|
||||||
|
+ lo_inode_put(lo, &parent_inode);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fuse_reply_err(req, 0);
|
||||||
|
+ }
|
||||||
|
unref_inode_lolocked(lo, inode, 1);
|
||||||
|
lo_inode_put(lo, &inode);
|
||||||
|
}
|
||||||
|
@@ -1690,6 +1852,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
|
||||||
|
if (!err) {
|
||||||
|
ssize_t fh;
|
||||||
|
|
||||||
|
+ update_version(lo, parent_inode);
|
||||||
|
+
|
||||||
|
pthread_mutex_lock(&lo->mutex);
|
||||||
|
fh = lo_add_fd_mapping(req, fd);
|
||||||
|
pthread_mutex_unlock(&lo->mutex);
|
||||||
|
@@ -1714,7 +1878,7 @@ out:
|
||||||
|
if (err) {
|
||||||
|
fuse_reply_err(req, err);
|
||||||
|
} else {
|
||||||
|
- fuse_reply_create(req, &e, fi);
|
||||||
|
+ fuse_reply_create(req, &e, fi, lo->shared);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2041,6 +2205,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
|
||||||
|
(void)ino;
|
||||||
|
ssize_t res;
|
||||||
|
struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
|
||||||
|
+ struct lo_data *lo = lo_data(req);
|
||||||
|
bool cap_fsetid_dropped = false;
|
||||||
|
|
||||||
|
out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
|
||||||
|
@@ -2067,6 +2232,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
|
||||||
|
if (res < 0) {
|
||||||
|
fuse_reply_err(req, -res);
|
||||||
|
} else {
|
||||||
|
+ struct lo_inode *inode;
|
||||||
|
+
|
||||||
|
+ inode = lo_inode(req, ino);
|
||||||
|
+ if (inode) {
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+ lo_inode_put(lo, &inode);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
fuse_reply_write(req, (size_t)res);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2095,6 +2268,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
|
||||||
|
off_t length, struct fuse_file_info *fi)
|
||||||
|
{
|
||||||
|
int err = EOPNOTSUPP;
|
||||||
|
+ struct lo_data *lo = lo_data(req);
|
||||||
|
(void)ino;
|
||||||
|
|
||||||
|
#ifdef CONFIG_FALLOCATE
|
||||||
|
@@ -2112,6 +2286,16 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
|
||||||
|
err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+ if (!err) {
|
||||||
|
+ struct lo_inode *inode;
|
||||||
|
+
|
||||||
|
+ inode = lo_inode(req, ino);
|
||||||
|
+ if (inode) {
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+ lo_inode_put(lo, &inode);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
fuse_reply_err(req, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2754,6 +2938,9 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
|
||||||
|
|
||||||
|
saverr = ret == -1 ? errno : 0;
|
||||||
|
|
||||||
|
+ if (!saverr) {
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+ }
|
||||||
|
out:
|
||||||
|
if (fd >= 0) {
|
||||||
|
close(fd);
|
||||||
|
@@ -2820,6 +3007,9 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
|
||||||
|
|
||||||
|
saverr = ret == -1 ? errno : 0;
|
||||||
|
|
||||||
|
+ if (!saverr) {
|
||||||
|
+ update_version(lo, inode);
|
||||||
|
+ }
|
||||||
|
out:
|
||||||
|
if (fd >= 0) {
|
||||||
|
close(fd);
|
||||||
|
@@ -3474,6 +3664,101 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void *ireg_do(void *data)
|
||||||
|
+{
|
||||||
|
+ struct lo_data *lo = data;
|
||||||
|
+ int res;
|
||||||
|
+ char buf[100];
|
||||||
|
+ struct srv_msg reply;
|
||||||
|
+ struct msgreply *rep;
|
||||||
|
+
|
||||||
|
+ for (;;) {
|
||||||
|
+ res = read(lo->ireg_sock, buf, sizeof(buf));
|
||||||
|
+ if (res <= 0) {
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "read(lo->ireg_sock, ...): %m\n");
|
||||||
|
+ } else {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "disconnected from ireg\n");
|
||||||
|
+ }
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+ if (res != sizeof(reply)) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "bad size message: %i\n", res);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memcpy(&reply, buf, sizeof(reply));
|
||||||
|
+ if (reply.op != SRV_VERSION) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "bad reply to IREG_GET: %i\n", reply.op);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rep = (struct msgreply *)(uintptr_t)reply.handle;
|
||||||
|
+ rep->inode->version_offset = reply.version.offset;
|
||||||
|
+ rep->inode->ireg_refid = reply.version.refid;
|
||||||
|
+ sem_post(&rep->ready);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void setup_shared_versions(struct lo_data *lo)
|
||||||
|
+{
|
||||||
|
+ int fd, sock, res;
|
||||||
|
+ const char *version_path = "/dev/shm/fuse_shared_versions";
|
||||||
|
+ struct stat stat;
|
||||||
|
+ struct sockaddr_un name = { .sun_family = AF_UNIX };
|
||||||
|
+ const char *socket_name = "/tmp/ireg.sock";
|
||||||
|
+ void *addr;
|
||||||
|
+
|
||||||
|
+ lo->ireg_sock = -1;
|
||||||
|
+ if (!lo->shared) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
|
||||||
|
+ if (sock == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "socket(AF_UNIX, SOCK_SEQPACKET, 0): %m\n");
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ strncpy(name.sun_path, socket_name, sizeof(name.sun_path) - 1);
|
||||||
|
+
|
||||||
|
+ res = connect(sock, (const struct sockaddr *)&name,
|
||||||
|
+ sizeof(struct sockaddr_un));
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "connect to ireg: %m\n");
|
||||||
|
+ close(sock);
|
||||||
|
+ lo->ireg_sock = -1;
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ lo->ireg_sock = sock;
|
||||||
|
+
|
||||||
|
+ fd = open(version_path, O_RDWR);
|
||||||
|
+ if (sock == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "open(%s, O_RDWR): %m\n", version_path);
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ res = fstat(fd, &stat);
|
||||||
|
+ if (res == -1) {
|
||||||
|
+ fuse_log(FUSE_LOG_ERR, "fstat(%i, &stat): %m\n", fd);
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ lo->version_table_size = stat.st_size / sizeof(lo->version_table[0]);
|
||||||
|
+
|
||||||
|
+ addr = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
+ if (addr == MAP_FAILED) {
|
||||||
|
+ fuse_log(
|
||||||
|
+ FUSE_LOG_ERR,
|
||||||
|
+ "mmap(NULL, %li, PROT_READ | PROT_WRITE, MAP_SHARED, %i, 0): %m\n",
|
||||||
|
+ stat.st_size, fd);
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ lo->version_table = addr;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void setup_root(struct lo_data *lo, struct lo_inode *root)
|
||||||
|
{
|
||||||
|
int fd, res;
|
||||||
|
@@ -3688,6 +3973,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
|
lo.use_statx = true;
|
||||||
|
|
||||||
|
+ setup_shared_versions(&lo);
|
||||||
|
se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
|
||||||
|
if (se == NULL) {
|
||||||
|
goto err_out1;
|
||||||
|
@@ -3711,9 +3997,24 @@ int main(int argc, char *argv[])
|
||||||
|
setup_sandbox(&lo, se, opts.syslog);
|
||||||
|
|
||||||
|
setup_root(&lo, &lo.root);
|
||||||
|
+
|
||||||
|
+ if (lo.ireg_sock != -1) {
|
||||||
|
+ pthread_t ireg_thread;
|
||||||
|
+
|
||||||
|
+ ret = pthread_create(&ireg_thread, NULL, ireg_do, &lo);
|
||||||
|
+ if (ret) {
|
||||||
|
+ fuse_log(FUSE_LOG_WARNING, "pthread_create: %s\n", strerror(ret));
|
||||||
|
+ ret = 1;
|
||||||
|
+ goto err_out4;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ get_shared(&lo, &lo.root);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Block until ctrl+c or fusermount -u */
|
||||||
|
ret = virtio_loop(se);
|
||||||
|
|
||||||
|
+err_out4:
|
||||||
|
fuse_session_unmount(se);
|
||||||
|
cleanup_capng();
|
||||||
|
err_out3:
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,167 @@
|
|||||||
|
From 119990ab3a30564c7e44f4e39344be48fc998f26 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Fri, 27 Jul 2018 10:36:41 +0100
|
||||||
|
Subject: [PATCH 28/29] virtio-fs: Allow mapping of meta data version table
|
||||||
|
|
||||||
|
The 'meta data version table' is a block of shared memory mapped between
|
||||||
|
multiple QEMUs and fuse daemons, so that they can be informed
|
||||||
|
of metadata updates. It's typically a shmfs file, and
|
||||||
|
it's specified as :
|
||||||
|
|
||||||
|
-device vhost-user-fs-pci,chardev=char0,tag=myfs,cache-size=1G,versiontable=/dev/shm/mdvt1
|
||||||
|
|
||||||
|
It gets mapped into the PCI bar after the data cache; it's read only.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
hw/virtio/vhost-user-fs-pci.c | 16 +++++++++--
|
||||||
|
hw/virtio/vhost-user-fs.c | 32 ++++++++++++++++++++++
|
||||||
|
include/hw/virtio/vhost-user-fs.h | 4 +++
|
||||||
|
include/standard-headers/linux/virtio_fs.h | 1 +
|
||||||
|
4 files changed, 51 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
index 19aaa8d722..aad0128fa5 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs-pci.c
|
||||||
|
@@ -42,6 +42,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
|
||||||
|
VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev);
|
||||||
|
DeviceState *vdev = DEVICE(&dev->vdev);
|
||||||
|
uint64_t cachesize;
|
||||||
|
+ uint64_t totalsize;
|
||||||
|
|
||||||
|
if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
|
||||||
|
/* Also reserve config change and hiprio queue vectors */
|
||||||
|
@@ -51,18 +52,29 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
|
||||||
|
qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
|
||||||
|
cachesize = dev->vdev.conf.cache_size;
|
||||||
|
|
||||||
|
+ /* PCIe bar needs to be a power of 2 */
|
||||||
|
+ totalsize = pow2ceil(cachesize + dev->vdev.mdvt_size);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* The bar starts with the data/DAX cache
|
||||||
|
- * Others will be added later.
|
||||||
|
+ * followed by the metadata cache.
|
||||||
|
*/
|
||||||
|
memory_region_init(&dev->cachebar, OBJECT(vpci_dev),
|
||||||
|
- "vhost-fs-pci-cachebar", cachesize);
|
||||||
|
+ "vhost-fs-pci-cachebar", totalsize);
|
||||||
|
if (cachesize) {
|
||||||
|
memory_region_add_subregion(&dev->cachebar, 0, &dev->vdev.cache);
|
||||||
|
virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR, 0, cachesize,
|
||||||
|
VIRTIO_FS_SHMCAP_ID_CACHE);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (dev->vdev.mdvt_size) {
|
||||||
|
+ memory_region_add_subregion(&dev->cachebar, cachesize,
|
||||||
|
+ &dev->vdev.mdvt);
|
||||||
|
+ virtio_pci_add_shm_cap(vpci_dev, VIRTIO_FS_PCI_CACHE_BAR,
|
||||||
|
+ cachesize, dev->vdev.mdvt_size,
|
||||||
|
+ VIRTIO_FS_SHMCAP_ID_VERTAB);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* After 'realized' so the memory region exists */
|
||||||
|
pci_register_bar(&vpci_dev->pci_dev, VIRTIO_FS_PCI_CACHE_BAR,
|
||||||
|
PCI_BASE_ADDRESS_SPACE_MEMORY |
|
||||||
|
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
|
||||||
|
index b43725824f..fb16db7e0d 100644
|
||||||
|
--- a/hw/virtio/vhost-user-fs.c
|
||||||
|
+++ b/hw/virtio/vhost-user-fs.c
|
||||||
|
@@ -432,6 +432,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
unsigned int i;
|
||||||
|
size_t len;
|
||||||
|
int ret;
|
||||||
|
+ int mdvtfd = -1;
|
||||||
|
|
||||||
|
if (!fs->conf.chardev.chr) {
|
||||||
|
error_setg(errp, "missing chardev");
|
||||||
|
@@ -475,6 +476,28 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
"no smaller than the page size");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
+ if (fs->conf.mdvtpath) {
|
||||||
|
+ struct stat statbuf;
|
||||||
|
+
|
||||||
|
+ mdvtfd = open(fs->conf.mdvtpath, O_RDWR);
|
||||||
|
+ if (mdvtfd < 0) {
|
||||||
|
+ error_setg_errno(errp, errno,
|
||||||
|
+ "Failed to open meta-data version table '%s'",
|
||||||
|
+ fs->conf.mdvtpath);
|
||||||
|
+
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (fstat(mdvtfd, &statbuf) == -1) {
|
||||||
|
+ error_setg_errno(errp, errno,
|
||||||
|
+ "Failed to stat meta-data version table '%s'",
|
||||||
|
+ fs->conf.mdvtpath);
|
||||||
|
+ close(mdvtfd);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fs->mdvt_size = statbuf.st_size;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (fs->conf.cache_size) {
|
||||||
|
/* Anonymous, private memory is not counted as overcommit */
|
||||||
|
cache_ptr = mmap(NULL, fs->conf.cache_size, DAX_WINDOW_PROT,
|
||||||
|
@@ -489,6 +512,14 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
fs->conf.cache_size, cache_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (mdvtfd) {
|
||||||
|
+ memory_region_init_ram_from_fd(&fs->mdvt, OBJECT(vdev),
|
||||||
|
+ "virtio-fs-mdvt",
|
||||||
|
+ fs->mdvt_size, true, mdvtfd, NULL);
|
||||||
|
+ /* The version table is read-only by the guest */
|
||||||
|
+ memory_region_set_readonly(&fs->mdvt, true);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -564,6 +595,7 @@ static Property vuf_properties[] = {
|
||||||
|
conf.num_request_queues, 1),
|
||||||
|
DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
|
||||||
|
DEFINE_PROP_SIZE("cache-size", VHostUserFS, conf.cache_size, 0),
|
||||||
|
+ DEFINE_PROP_STRING("versiontable", VHostUserFS, conf.mdvtpath),
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
index 845cdb0177..83015ac0fc 100644
|
||||||
|
--- a/include/hw/virtio/vhost-user-fs.h
|
||||||
|
+++ b/include/hw/virtio/vhost-user-fs.h
|
||||||
|
@@ -47,6 +47,7 @@ typedef struct {
|
||||||
|
uint16_t num_request_queues;
|
||||||
|
uint16_t queue_size;
|
||||||
|
uint64_t cache_size;
|
||||||
|
+ char *mdvtpath;
|
||||||
|
} VHostUserFSConf;
|
||||||
|
|
||||||
|
struct VHostUserFS {
|
||||||
|
@@ -61,6 +62,9 @@ struct VHostUserFS {
|
||||||
|
|
||||||
|
/*< public >*/
|
||||||
|
MemoryRegion cache;
|
||||||
|
+ /* Metadata version table */
|
||||||
|
+ size_t mdvt_size;
|
||||||
|
+ MemoryRegion mdvt;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Callbacks from the vhost-user code for slave commands */
|
||||||
|
diff --git a/include/standard-headers/linux/virtio_fs.h b/include/standard-headers/linux/virtio_fs.h
|
||||||
|
index 808aa3a402..a17b5172a8 100644
|
||||||
|
--- a/include/standard-headers/linux/virtio_fs.h
|
||||||
|
+++ b/include/standard-headers/linux/virtio_fs.h
|
||||||
|
@@ -18,6 +18,7 @@ struct virtio_fs_config {
|
||||||
|
|
||||||
|
/* For the id field in virtio_pci_shm_cap */
|
||||||
|
#define VIRTIO_FS_SHMCAP_ID_CACHE 0
|
||||||
|
+#define VIRTIO_FS_SHMCAP_ID_VERTAB 1
|
||||||
|
|
||||||
|
#define VIRTIO_FS_PCI_CACHE_BAR 2
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -0,0 +1,35 @@
|
|||||||
|
From e2a3c273639368221dae39a7f230a46d0a580e4d Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||||
|
Date: Tue, 21 Jan 2020 10:20:14 +0000
|
||||||
|
Subject: [PATCH 29/29] virtiofsd: Add printf checking to fuse_log
|
||||||
|
|
||||||
|
Use qemu's GCC_FMT_ATTR to add printf style checking to fuse_log.
|
||||||
|
|
||||||
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||||
|
---
|
||||||
|
tools/virtiofsd/fuse_log.h | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h
|
||||||
|
index 8d7091bd4d..5c2df71603 100644
|
||||||
|
--- a/tools/virtiofsd/fuse_log.h
|
||||||
|
+++ b/tools/virtiofsd/fuse_log.h
|
||||||
|
@@ -14,6 +14,7 @@
|
||||||
|
* This file defines the logging interface of FUSE
|
||||||
|
*/
|
||||||
|
|
||||||
|
+#include "qemu/compiler.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log severity level
|
||||||
|
@@ -68,6 +69,7 @@ void fuse_set_log_func(fuse_log_func_t func);
|
||||||
|
* @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc)
|
||||||
|
* @param fmt sprintf-style format string including newline
|
||||||
|
*/
|
||||||
|
+GCC_FMT_ATTR(2,3)
|
||||||
|
void fuse_log(enum fuse_log_level level, const char *fmt, ...);
|
||||||
|
|
||||||
|
#endif /* FUSE_LOG_H_ */
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
@ -107,8 +107,8 @@ assets:
|
|||||||
|
|
||||||
qemu-experimental:
|
qemu-experimental:
|
||||||
description: "QEMU with virtiofs support"
|
description: "QEMU with virtiofs support"
|
||||||
url: "https://gitlab.com/virtio-fs/qemu"
|
url: "https://github.com/qemu/qemu"
|
||||||
version: "qemu5.0-virtiofs-with51bits-dax"
|
version: "470dd6bd360782f5137f7e3376af6a44658eb1d3"
|
||||||
|
|
||||||
image:
|
image:
|
||||||
description: |
|
description: |
|
||||||
|
Loading…
Reference in New Issue
Block a user