diff --git a/kernel/patches/4.19.x/0001-fuse-add-skeleton-virtio_fs.ko-module.patch b/kernel/patches/4.19.x/0001-fuse-add-skeleton-virtio_fs.ko-module.patch
deleted file mode 100644
index f218c9865c..0000000000
--- a/kernel/patches/4.19.x/0001-fuse-add-skeleton-virtio_fs.ko-module.patch
+++ /dev/null
@@ -1,4604 +0,0 @@
-From 9a821958eb0b586b526af5490c811f28ec062d94 Mon Sep 17 00:00:00 2001
-From: Stefan Hajnoczi <stefanha@redhat.com>
-Date: Tue, 12 Jun 2018 09:41:17 +0100
-Subject: [PATCH] fuse: add skeleton virtio_fs.ko module
-
-Add a basic file system module for virtio-fs.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: add probe/remove virtio driver
-
-Add basic probe/remove functionality for the new virtio-fs device.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: extract fuse_fill_super_common()
-
-fuse_fill_super() includes code to process the fd= option and link the
-struct fuse_dev to the fd's struct file.  In virtio-fs there is no file
-descriptor because /dev/fuse is not used.
-
-This patch extracts fuse_fill_super_common() so that both classic fuse
-and virtio-fs can share the code to initialize a mount.
-
-parse_fuse_opt() is also extracted so that the fuse_fill_super_common()
-caller has access to the mount options.  This allows classic fuse to
-handle the fd= option outside fuse_fill_super_common().
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-virtio_fs: get mount working
-
-Provide definitions of ->mount and  ->kill_sb. This is still WIP.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: export fuse_end_request()
-
-virtio-fs will need to complete requests from outside fs/fuse/dev.c.
-Make the symbol visible.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: export fuse_len_args()
-
-virtio-fs will need to query the length of fuse_arg lists.  Make the
-symbol visible.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: Export fuse_send_init_request()
-
-This will be used by virtio-fs to send init request to fuse server after
-initialization of virt queues.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: add fuse_iqueue_ops callbacks
-
-The /dev/fuse device uses fiq->waitq and fasync to signal that requests
-are available.  These mechanisms do not apply to virtio-fs.  This patch
-introduces callbacks so alternative behavior can be used.
-
-Note that queue_interrupt() changes along these lines:
-
-  spin_lock(&fiq->waitq.lock);
-  wake_up_locked(&fiq->waitq);
-+ kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
-  spin_unlock(&fiq->waitq.lock);
-- kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
-
-Since queue_request() and queue_forget() also call kill_fasync() inside
-the spinlock this should be safe.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: Separate fuse device allocation and installation in fuse_conn
-
-As of now fuse_dev_alloc() both allocates a fuse device and installs it
-in fuse_conn list. fuse_dev_alloc() can fail if fuse_device allocation
-fails.
-
-virtio-fs needs to initialize multiple fuse devices (one per virtio
-queue). It initializes one fuse device as part of call to
-fuse_fill_super_common() and rest of the devices are allocated and
-installed after that.
-
-But, we can't affort to fail after calling fuse_fill_super_common() as
-we don't have a way to undo all the actions done by fuse_fill_super_common().
-So to avoid failures after the call to fuse_fill_super_common(),
-pre-allocate all fuse devices early and install them into fuse connection
-later.
-
-This patch provides two separate helpers for fuse device allocation and
-fuse device installation in fuse_conn.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: process requests queues
-
-Send normal requests to the device and handle completions.
-
-This is enough to get mount and basic I/O working.  The hiprio and
-notifications queues still need to be implemented for full FUSE
-functionality.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: export fuse_get_unique()
-
-virtio-fs will need unique IDs for FORGET requests from outside
-fs/fuse/dev.c.  Make the symbol visible.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: implement FUSE_FORGET for virtio-fs
-
-Sent single FUSE_FORGET requests on the hiprio queue.  In the future it
-may be possible to do FUSE_BATCH_FORGET but that is tricky since
-virtio-fs gets called synchronously when forgets are queued.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-virtio_fs: Set up dax_device
-
-Setup a dax device.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-dax: remove block device dependencies
-
-Although struct dax_device itself is not tied to a block device, some
-DAX code assumes there is a block device.  Make block devices optional
-by allowing bdev to be NULL in commonly used DAX APIs.
-
-When there is no block device:
- * Skip the partition offset calculation in bdev_dax_pgoff()
- * Skip the blkdev_issue_zeroout() optimization
-
-Note that more block device assumptions remain but I haven't reach those
-code paths yet.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-dax: Pass dax_dev to dax_writeback_mapping_range()
-
-Right now dax_writeback_mapping_range() is passed a bdev and dax_dev
-is searched from that bdev name.
-
-virtio-fs does not have a bdev. So pass in dax_dev also to
-dax_writeback_mapping_range(). If dax_dev is passed in, bdev is not
-used otherwise dax_dev is searched using bdev.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: add fuse_conn->dax_dev field
-
-A struct dax_device instance is a prerequisite for the DAX filesystem
-APIs.  Let virtio_fs associate a dax_device with a fuse_conn.  Classic
-FUSE and CUSE set the pointer to NULL, disabling DAX.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-virtio: Add get_shm_region method
-
-Virtio defines 'shared memory regions' that provide a continuously
-shared region between the host and guest.
-
-Provide a method to find a particular region on a device.
-
-Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-
-virtio: Implement get_shm_region for PCI transport
-
-On PCI the shm regions are found using capability entries;
-find a region by searching for the capability.
-
-Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-
-virtio: Implement get_shm_region for MMIO transport
-
-On MMIO a new set of registers is defined for finding SHM
-regions.  Add their definitions and use them to find the region.
-
-Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
-
-fuse: map virtio_fs DAX window
-
-Use the shm capability to find the cache entry and map it.
-
-The DAX window is accessed by the fs/dax.c infrastructure and must have
-struct pages (at least on x86).  Use devm_memremap_pages() to map the
-DAX window PCI BAR and allocate struct page.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-
-virito-fs: Make dax optional
-
-Add a 'dax' option and only enable dax when it's on.
-
-Also show "dax" in mount options if filesystem was mounted with dax
-enabled.
-
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-Limit number of pages returned by direct_access()
-
-Truncate number of pages mapped by direct_access() to remain with-in window
-size. User might request mapping pages beyond window size.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Introduce fuse_dax_mapping
-
-Introduce fuse_dax_mapping. This type will be used to keep track of
-per inode dax mappings.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-Create a list of free memory ranges
-
-Divide the dax memory range into fixed size ranges (2MB for now) and put
-them in a list. This will track free ranges. Once an inode requires a
-free range, we will take one from here and put it in interval-tree
-of ranges assigned to inode.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: simplify fuse_fill_super_common() calling
-
-Add more fields to "struct fuse_mount_data" so that less parameters
-have to be passed to function fuse_fill_super_common().
-
-Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
-
-fuse: Introduce setupmapping/removemapping commands
-
-Introduce two new fuse commands to setup/remove memory mappings.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-Introduce interval tree basic data structures
-
-We want to use interval tree to keep track of per inode dax mappings.
-Introduce basic data structures.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Implement basic DAX read/write support commands
-
-This patch implements basic DAX support. mmap() is not implemented
-yet and will come in later patches. This patch looks into implemeting
-read/write.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Maintain a list of busy elements
-
-This list will be used selecting fuse_dax_mapping to free when number of
-free mappings drops below a threshold.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-Do fallocate() to grow file before mapping for file growing writes
-
-How to handle file growing writes. For now, this patch does fallocate() to
-grow file and then map it using dax. We need to figure out what's the best
-way to handle it.
-
-This patch does fallocate() and setup mapping operations in
-fuse_dax_write_iter(), instead of iomap_begin(). I don't have access to file
-pointer needed to send a message to fuse daemon in iomap_begin().
-
-Dave Chinner has expressed concers with this approach as this is not
-atomic. If guest crashes after falloc() but before data was written,
-user will think that filesystem lost its data. So this is still an
-outstanding issue.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: add DAX mmap support
-
-Add DAX mmap() support.
-
-Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-
-fuse: delete dentry if timeout is zero
-
-Don't hold onto dentry in lru list if need to re-lookup it anyway at next
-access.
-
-More advanced version of this patch would periodically flush out dentries
-from the lru which have gone stale.
-
-Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
-
-fuse: Define dax address space operations
-
-This is done along the lines of ext4 and xfs. I primarily wanted ->writepages
-hook at this time so that I could call into dax_writeback_mapping_range().
-This in turn will decide which pfns need to be written back and call
-dax_flush() on those.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse, dax: Take ->i_mmap_sem lock during dax page fault
-
-We need some kind of locking mechanism here. Normal file systems like
-ext4 and xfs seems to take their own semaphore to protect agains
-truncate while fault is going on.
-
-We have additional requirement to protect against fuse dax memory range
-reclaim. When a range has been selected for reclaim, we need to make sure
-no other read/write/fault can try to access that memory range while
-reclaim is in progress. Once reclaim is complete, lock will be released
-and read/write/fault will trigger allocation of fresh dax range.
-
-Taking inode_lock() is not an option in fault path as lockdep complains
-about circular dependencies. So define a new fuse_inode->i_mmap_sem.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Add logic to free up a memory range
-
-Add logic to free up a busy memory range. Freed memory range will be
-returned to free pool. Add a worker which can be started to select
-and free some busy memory ranges.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Add logic to do direct reclaim of memory
-
-This can be done only from same inode. Also it can be done only for
-read/write case and not for fault case. Reason, as of now reclaim requires
-holding inode_lock, fuse_inode->i_mmap_sem and fuse_inode->dmap_tree
-locks in that order and only read/write path will allow that (and not
-fault path).
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Kick worker when free memory drops below 20% of total ranges
-
-Kick worker to free up some memory when number of free ranges drops below
-20% of total free ranges at the time of initialization.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: multiplex cached/direct_io/dax file operations
-
-Dispatch FORGET requests later instead of dropping them
-
-If virtio queue is full, then don't drop FORGET requests. Instead, wait
-a bit and try to dispatch these little later using a worker thread.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-Release file in process context
-
-fuse_file_put(sync) can be called with sync=true/false. If sync=true,
-it waits for release request response and then calls iput() in the
-caller's context. If sync=false, it does not wait for release request
-response, frees the fuse_file struct immediately and req->end function
-does the iput().
-
-iput() can be a problem with DAX if called in req->end context. If this
-is last reference to inode (VFS has let go its reference already), then
-iput() will clean DAX mappings as well and send REMOVEMAPPING requests
-and wait for completion. (All the the worker thread context which is
-processing fuse replies from daemon on the host).
-
-That means it blocks worker thread and it stops processing further
-replies and system deadlocks.
-
-So for now, force sync release of file in case of DAX inodes.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Do not block on inode lock while freeing memory range
-
-Once we select a memory range to free, we currently block on inode
-lock. Do not block and use trylock instead. And move on to next memory
-range if trylock fails.
-
-Reason being that in next few patches I want to enabling waiting for
-memmory ranges to become free in fuse_iomap_begin(). So insted of
-returning -EBUSY, a process will wait for a memory range to become
-free.
-
-We don't want to end up in a situation where process is sleeping in
-iomap_begin() with inode lock held and worker is trying to free
-memory from same inode, resulting in deadlock.
-
-To avoid deadlock, use trylock instead.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Reschedule dax free work if too many EAGAIN attempts
-
-fuse_dax_free_memory() can be very cpu intensive in corner cases. For example,
-if one inode has consumed all the memory and a setupmapping request is
-pending, that means inode lock is held by request and worker thread will
-not get lock for a while. And given there is only one inode consuming all
-the dax ranges, all the attempts to acquire lock will fail.
-
-So if there are too many inode lock failures (-EAGAIN), reschedule the
-worker with a 10ms delay.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Wait for memory ranges to become free
-
-Sometimes we run out of memory ranges. So in that case, wait for memory
-ranges to become free, instead of returning -EBUSY.
-
-dax fault path is holding fuse_inode->i_mmap_sem and once that is being
-held, memory reclaim can't be done. Its not safe to wait while holding
-fuse_inode->i_mmap_sem for two reasons.
-
-- Worker thread to free memory might block on fuse_inode->i_mmap_sem as well.
-- This inode is holding all the memory and more memory can't be freed.
-
-In both the cases, deadlock will ensue. So return -ENOSPC from iomap_begin()
-in fault path if memory can't be allocated. Drop fuse_inode->i_mmap_sem,
-and wait for a free range to become available and retry.
-
-read/write path is a different story. We hold inode lock and lock ordering
-allows to grab fuse_inode->immap_sem, if needed. That means we can do direct
-reclaim in that path. But if there is no memory allocated to this inode,
-then direct reclaim will not work and we need to wait for a memory range
-to become free. So try following order.
-
-A. Try to get a free range.
-B. If not, try direct reclaim.
-C. If not, wait for a memory range to become free
-
-Here sleeping with locks held should be fine because in step B, we made
-sure this inode is not holding any ranges. That means other inodes are
-holding ranges and somebody should be able to free memory. Also, worker
-thread does a trylock() on inode lock. That means worker tread will not
-wait on this inode and move onto next memory range. Hence above sequence
-should be deadlock free.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Take inode lock for dax inode truncation
-
-When a file is opened with O_TRUNC, we need to make sure that any other
-DAX operation is not in progress. DAX expects i_size to be stable.
-
-In fuse_iomap_begin() we check for i_size at multiple places and we expect
-i_size to not change.
-
-Another problem is, if we setup a mapping in fuse_iomap_begin(), and
-file gets truncated and dax read/write happens, KVM currently hangs.
-It tries to fault in a page which does not exist on host (file got
-truncated). It probably requries fixing in KVM.
-
-So for now, take inode lock. Once KVM is fixed, we might have to
-have a look at it again.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-fuse: Clear setuid bit even in direct I/O path
-
-With cache=never, we fall back to direct IO. pjdfstest chmod test 12.t was
-failing because if a file has setuid bit, it should be cleared if an
-unpriviledged user opens it for write and writes to it.
-
-Call fuse_remove_privs() even for direct I/O path.
-
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
-
-virtio: Free fuse devices on umount
-
-When unmounting the fs close all the fuse devices.
-This includes making sure the daemon gets a FUSE_DESTROY to
-tell it.
-
-Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-
-virtio-fs: Fix a race in range reclaim
-
-We have the notion of doing inline dax range reclaim where caller does not
-have to drop inode lock and reclaim one of it's dax ranges. It assumed
-there is no other reader/writer using that inode (hence not using dax
-range being reclaimed).
-
-But fuse read path takes shared inode lock. That means there could be other
-readers while we need to do reclaim. If we try to reclaim now, it is possible
-we end up reclaiming the range used by another process.
-
-To remove that race, do not try to do inline reclaim for read path. Instead
-return -ENOSPC and fuse read path will try again when a free range is
-available.
-
-Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
-Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
----
- drivers/dax/super.c                |    3 +-
- drivers/virtio/virtio_mmio.c       |   32 +
- drivers/virtio/virtio_pci_modern.c |  108 +++
- fs/dax.c                           |   23 +-
- fs/ext2/inode.c                    |    2 +-
- fs/ext4/inode.c                    |    2 +-
- fs/fuse/Kconfig                    |   11 +
- fs/fuse/Makefile                   |    1 +
- fs/fuse/cuse.c                     |    5 +-
- fs/fuse/dev.c                      |   80 +-
- fs/fuse/dir.c                      |   28 +-
- fs/fuse/file.c                     | 1001 +++++++++++++++++++++++--
- fs/fuse/fuse_i.h                   |  202 ++++-
- fs/fuse/inode.c                    |  316 +++++---
- fs/fuse/virtio_fs.c                | 1121 ++++++++++++++++++++++++++++
- fs/splice.c                        |    3 +-
- fs/xfs/xfs_aops.c                  |    2 +-
- include/linux/dax.h                |    6 +-
- include/linux/fs.h                 |    2 +
- include/linux/virtio_config.h      |   17 +
- include/uapi/linux/fuse.h          |   34 +
- include/uapi/linux/virtio_fs.h     |   44 ++
- include/uapi/linux/virtio_ids.h    |    1 +
- include/uapi/linux/virtio_mmio.h   |   11 +
- include/uapi/linux/virtio_pci.h    |   10 +
- 25 files changed, 2883 insertions(+), 182 deletions(-)
- create mode 100644 fs/fuse/virtio_fs.c
- create mode 100644 include/uapi/linux/virtio_fs.h
-
-diff --git a/drivers/dax/super.c b/drivers/dax/super.c
-index 6e928f37d..74f3bf7ae 100644
---- a/drivers/dax/super.c
-+++ b/drivers/dax/super.c
-@@ -52,7 +52,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
- int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
- 		pgoff_t *pgoff)
- {
--	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
-+	sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
-+	phys_addr_t phys_off = (start_sect + sector) * 512;
- 
- 	if (pgoff)
- 		*pgoff = PHYS_PFN(phys_off);
-diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
-index 4cd9ea5c7..9642fa8db 100644
---- a/drivers/virtio/virtio_mmio.c
-+++ b/drivers/virtio/virtio_mmio.c
-@@ -494,6 +494,37 @@ static const char *vm_bus_name(struct virtio_device *vdev)
- 	return vm_dev->pdev->name;
- }
- 
-+static bool vm_get_shm_region(struct virtio_device *vdev,
-+			      struct virtio_shm_region *region, u8 id)
-+{
-+	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-+	u64 len, addr;
-+
-+	/* Select the region we're interested in */
-+	writel(id, vm_dev->base + VIRTIO_MMIO_SHM_SEL);
-+
-+	/* Read the region size */
-+	len = (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_LEN_LOW);
-+	len |= (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_LEN_HIGH) << 32;
-+
-+	region->len = len;
-+
-+	/* Check if region length is -1. If that's the case, the shared memory
-+	 * region does not exist and there is no need to proceed further.
-+	 */
-+	if (len == ~(u64)0) {
-+		return false;
-+	}
-+
-+	/* Read the region base address */
-+	addr = (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_BASE_LOW);
-+	addr |= (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_BASE_HIGH) << 32;
-+
-+	region->addr = addr;
-+
-+	return true;
-+}
-+
- static const struct virtio_config_ops virtio_mmio_config_ops = {
- 	.get		= vm_get,
- 	.set		= vm_set,
-@@ -506,6 +537,7 @@ static const struct virtio_config_ops virtio_mmio_config_ops = {
- 	.get_features	= vm_get_features,
- 	.finalize_features = vm_finalize_features,
- 	.bus_name	= vm_bus_name,
-+	.get_shm_region = vm_get_shm_region,
- };
- 
- 
-diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
-index 07571dacc..51c9e6eca 100644
---- a/drivers/virtio/virtio_pci_modern.c
-+++ b/drivers/virtio/virtio_pci_modern.c
-@@ -446,6 +446,112 @@ static void del_vq(struct virtio_pci_vq_info *info)
- 	vring_del_virtqueue(vq);
- }
- 
-+static int virtio_pci_find_shm_cap(struct pci_dev *dev,
-+                                   u8 required_id,
-+                                   u8 *bar, u64 *offset, u64 *len)
-+{
-+	int pos;
-+
-+        for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
-+             pos > 0;
-+             pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
-+		u8 type, cap_len, id;
-+                u32 tmp32;
-+                u64 res_offset, res_length;
-+
-+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
-+                                                         cfg_type),
-+                                     &type);
-+                if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
-+                        continue;
-+
-+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
-+                                                         cap_len),
-+                                     &cap_len);
-+                if (cap_len != sizeof(struct virtio_pci_shm_cap)) {
-+		        printk(KERN_ERR "%s: shm cap with bad size offset: %d size: %d\n",
-+                               __func__, pos, cap_len);
-+                        continue;
-+                };
-+
-+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_shm_cap,
-+                                                         id),
-+                                     &id);
-+                if (id != required_id)
-+                        continue;
-+
-+                /* Type, and ID match, looks good */
-+                pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
-+                                                         bar),
-+                                     bar);
-+
-+                /* Read the lower 32bit of length and offset */
-+                pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, offset),
-+                                      &tmp32);
-+                res_offset = tmp32;
-+                pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, length),
-+                                      &tmp32);
-+                res_length = tmp32;
-+
-+                /* and now the top half */
-+                pci_read_config_dword(dev,
-+                                      pos + offsetof(struct virtio_pci_shm_cap,
-+                                                     offset_hi),
-+                                      &tmp32);
-+                res_offset |= ((u64)tmp32) << 32;
-+                pci_read_config_dword(dev,
-+                                      pos + offsetof(struct virtio_pci_shm_cap,
-+                                                     length_hi),
-+                                      &tmp32);
-+                res_length |= ((u64)tmp32) << 32;
-+
-+                *offset = res_offset;
-+                *len = res_length;
-+
-+                return pos;
-+        }
-+        return 0;
-+}
-+
-+static bool vp_get_shm_region(struct virtio_device *vdev,
-+			      struct virtio_shm_region *region, u8 id)
-+{
-+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-+	struct pci_dev *pci_dev = vp_dev->pci_dev;
-+	u8 bar;
-+	u64 offset, len;
-+	phys_addr_t phys_addr;
-+	size_t bar_len;
-+	char *bar_name;
-+	int ret;
-+
-+	if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len)) {
-+		return false;
-+	}
-+
-+	ret = pci_request_region(pci_dev, bar, "virtio-pci-shm");
-+	if (ret < 0) {
-+		dev_err(&pci_dev->dev, "%s: failed to request BAR\n",
-+			__func__);
-+		return false;
-+	}
-+
-+	phys_addr = pci_resource_start(pci_dev, bar);
-+	bar_len = pci_resource_len(pci_dev, bar);
-+
-+        if (offset + len > bar_len) {
-+                dev_err(&pci_dev->dev,
-+                        "%s: bar shorter than cap offset+len\n",
-+                        __func__);
-+                return false;
-+        }
-+
-+	region->len = len;
-+	region->addr = (u64) phys_addr + offset;
-+
-+	return true;
-+}
-+
- static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
- 	.get		= NULL,
- 	.set		= NULL,
-@@ -460,6 +566,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
- 	.bus_name	= vp_bus_name,
- 	.set_vq_affinity = vp_set_vq_affinity,
- 	.get_vq_affinity = vp_get_vq_affinity,
-+	.get_shm_region  = vp_get_shm_region,
- };
- 
- static const struct virtio_config_ops virtio_pci_config_ops = {
-@@ -476,6 +583,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
- 	.bus_name	= vp_bus_name,
- 	.set_vq_affinity = vp_set_vq_affinity,
- 	.get_vq_affinity = vp_get_vq_affinity,
-+	.get_shm_region  = vp_get_shm_region,
- };
- 
- /**
-diff --git a/fs/dax.c b/fs/dax.c
-index 75a289c31..8c55d4bdf 100644
---- a/fs/dax.c
-+++ b/fs/dax.c
-@@ -1021,12 +1021,12 @@ static int dax_writeback_one(struct dax_device *dax_dev,
-  * on persistent storage prior to completion of the operation.
-  */
- int dax_writeback_mapping_range(struct address_space *mapping,
--		struct block_device *bdev, struct writeback_control *wbc)
-+		struct block_device *bdev, struct dax_device *dax_dev,
-+		struct writeback_control *wbc)
- {
- 	struct inode *inode = mapping->host;
- 	pgoff_t start_index, end_index;
- 	pgoff_t indices[PAGEVEC_SIZE];
--	struct dax_device *dax_dev;
- 	struct pagevec pvec;
- 	bool done = false;
- 	int i, ret = 0;
-@@ -1037,9 +1037,12 @@ int dax_writeback_mapping_range(struct address_space *mapping,
- 	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
- 		return 0;
- 
--	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
--	if (!dax_dev)
--		return -EIO;
-+	if (bdev) {
-+		WARN_ON(dax_dev);
-+		dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-+		if (!dax_dev)
-+			return -EIO;
-+	}
- 
- 	start_index = wbc->range_start >> PAGE_SHIFT;
- 	end_index = wbc->range_end >> PAGE_SHIFT;
-@@ -1073,7 +1076,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
- 		start_index = indices[pvec.nr - 1] + 1;
- 	}
- out:
--	put_dax(dax_dev);
-+	if (bdev)
-+		put_dax(dax_dev);
- 	trace_dax_writeback_range_done(inode, start_index, end_index);
- 	return (ret < 0 ? ret : 0);
- }
-@@ -1141,7 +1145,12 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
- static bool dax_range_is_aligned(struct block_device *bdev,
- 				 unsigned int offset, unsigned int length)
- {
--	unsigned short sector_size = bdev_logical_block_size(bdev);
-+	unsigned short sector_size;
-+
-+	if (!bdev)
-+		return false;
-+
-+	sector_size = bdev_logical_block_size(bdev);
- 
- 	if (!IS_ALIGNED(offset, sector_size))
- 		return false;
-diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
-index e4bb9386c..c9b024daf 100644
---- a/fs/ext2/inode.c
-+++ b/fs/ext2/inode.c
-@@ -956,7 +956,7 @@ static int
- ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
- {
- 	return dax_writeback_mapping_range(mapping,
--			mapping->host->i_sb->s_bdev, wbc);
-+			mapping->host->i_sb->s_bdev, NULL, wbc);
- }
- 
- const struct address_space_operations ext2_aops = {
-diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
-index 05dc5a4ba..221824742 100644
---- a/fs/ext4/inode.c
-+++ b/fs/ext4/inode.c
-@@ -2949,7 +2949,7 @@ static int ext4_dax_writepages(struct address_space *mapping,
- 	percpu_down_read(&sbi->s_journal_flag_rwsem);
- 	trace_ext4_writepages(inode, wbc);
- 
--	ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
-+	ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, NULL, wbc);
- 	trace_ext4_writepages_result(inode, wbc, ret,
- 				     nr_to_write - wbc->nr_to_write);
- 	percpu_up_read(&sbi->s_journal_flag_rwsem);
-diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
-index 76f09ce7e..46e9a8ff9 100644
---- a/fs/fuse/Kconfig
-+++ b/fs/fuse/Kconfig
-@@ -26,3 +26,14 @@ config CUSE
- 
- 	  If you want to develop or use a userspace character device
- 	  based on CUSE, answer Y or M.
-+
-+config VIRTIO_FS
-+	tristate "Virtio Filesystem"
-+	depends on FUSE_FS
-+	select VIRTIO
-+	help
-+	  The Virtio Filesystem allows guests to mount file systems from the
-+          host.
-+
-+	  If you want to share files between guests or with the host, answer Y
-+          or M.
-diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
-index 60da84a86..d125ff826 100644
---- a/fs/fuse/Makefile
-+++ b/fs/fuse/Makefile
-@@ -4,5 +4,6 @@
- 
- obj-$(CONFIG_FUSE_FS) += fuse.o
- obj-$(CONFIG_CUSE) += cuse.o
-+obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o
- 
- fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
-diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
-index 8f6818125..d49d64f42 100644
---- a/fs/fuse/cuse.c
-+++ b/fs/fuse/cuse.c
-@@ -503,9 +503,10 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
- 	 * Limit the cuse channel to requests that can
- 	 * be represented in file->f_cred->user_ns.
- 	 */
--	fuse_conn_init(&cc->fc, file->f_cred->user_ns);
-+	fuse_conn_init(&cc->fc, file->f_cred->user_ns, NULL, &fuse_dev_fiq_ops,
-+					NULL);
- 
--	fud = fuse_dev_alloc(&cc->fc);
-+	fud = fuse_dev_alloc_install(&cc->fc);
- 	if (!fud) {
- 		kfree(cc);
- 		return -ENOMEM;
-diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
-index 6ee471b72..601da8d58 100644
---- a/fs/fuse/dev.c
-+++ b/fs/fuse/dev.c
-@@ -103,6 +103,7 @@ void fuse_request_free(struct fuse_req *req)
- 	}
- 	kmem_cache_free(fuse_req_cachep, req);
- }
-+EXPORT_SYMBOL_GPL(fuse_request_free);
- 
- void __fuse_get_request(struct fuse_req *req)
- {
-@@ -310,7 +311,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
- }
- EXPORT_SYMBOL_GPL(fuse_put_request);
- 
--static unsigned len_args(unsigned numargs, struct fuse_arg *args)
-+unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args)
- {
- 	unsigned nbytes = 0;
- 	unsigned i;
-@@ -320,19 +321,41 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
- 
- 	return nbytes;
- }
-+EXPORT_SYMBOL_GPL(fuse_len_args);
- 
--static u64 fuse_get_unique(struct fuse_iqueue *fiq)
-+u64 fuse_get_unique(struct fuse_iqueue *fiq)
- {
- 	return ++fiq->reqctr;
- }
-+EXPORT_SYMBOL_GPL(fuse_get_unique);
- 
--static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
-+/**
-+ * A new request is available, wake fiq->waitq
-+ */
-+static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
-+__releases(fiq->waitq.lock)
- {
--	req->in.h.len = sizeof(struct fuse_in_header) +
--		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
--	list_add_tail(&req->list, &fiq->pending);
- 	wake_up_locked(&fiq->waitq);
- 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
-+	spin_unlock(&fiq->waitq.lock);
-+}
-+
-+const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
-+	.wake_forget_and_unlock		= fuse_dev_wake_and_unlock,
-+	.wake_interrupt_and_unlock	= fuse_dev_wake_and_unlock,
-+	.wake_pending_and_unlock	= fuse_dev_wake_and_unlock,
-+};
-+EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
-+
-+static void queue_request_and_unlock(struct fuse_iqueue *fiq,
-+				     struct fuse_req *req)
-+__releases(fiq->waitq.lock)
-+{
-+	req->in.h.len = sizeof(struct fuse_in_header) +
-+		fuse_len_args(req->in.numargs,
-+			      (struct fuse_arg *) req->in.args);
-+	list_add_tail(&req->list, &fiq->pending);
-+	fiq->ops->wake_pending_and_unlock(fiq);
- }
- 
- void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
-@@ -347,12 +370,11 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
- 	if (fiq->connected) {
- 		fiq->forget_list_tail->next = forget;
- 		fiq->forget_list_tail = forget;
--		wake_up_locked(&fiq->waitq);
--		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
-+		fiq->ops->wake_forget_and_unlock(fiq);
- 	} else {
- 		kfree(forget);
-+		spin_unlock(&fiq->waitq.lock);
- 	}
--	spin_unlock(&fiq->waitq.lock);
- }
- 
- static void flush_bg_queue(struct fuse_conn *fc)
-@@ -367,8 +389,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
- 		fc->active_background++;
- 		spin_lock(&fiq->waitq.lock);
- 		req->in.h.unique = fuse_get_unique(fiq);
--		queue_request(fiq, req);
--		spin_unlock(&fiq->waitq.lock);
-+		queue_request_and_unlock(fiq, req);
- 	}
- }
- 
-@@ -380,7 +401,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
-  * the 'end' callback is called if given, else the reference to the
-  * request is released
-  */
--static void request_end(struct fuse_conn *fc, struct fuse_req *req)
-+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
- {
- 	struct fuse_iqueue *fiq = &fc->iq;
- 
-@@ -424,6 +445,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
- put_request:
- 	fuse_put_request(fc, req);
- }
-+EXPORT_SYMBOL_GPL(fuse_request_end);
- 
- static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
- {
-@@ -434,10 +456,10 @@ static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
- 	}
- 	if (list_empty(&req->intr_entry)) {
- 		list_add_tail(&req->intr_entry, &fiq->interrupts);
--		wake_up_locked(&fiq->waitq);
-+		fiq->ops->wake_interrupt_and_unlock(fiq);
-+	} else {
-+		spin_unlock(&fiq->waitq.lock);
- 	}
--	spin_unlock(&fiq->waitq.lock);
--	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
- }
- 
- static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
-@@ -496,14 +518,13 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
- 		req->out.h.error = -ENOTCONN;
- 	} else {
- 		req->in.h.unique = fuse_get_unique(fiq);
--		queue_request(fiq, req);
- 		/* acquire extra reference, since request is still needed
--		   after request_end() */
-+		   after fuse_request_end() */
- 		__fuse_get_request(req);
--		spin_unlock(&fiq->waitq.lock);
-+		queue_request_and_unlock(fiq, req);
- 
- 		request_wait_answer(fc, req);
--		/* Pairs with smp_wmb() in request_end() */
-+		/* Pairs with smp_wmb() in fuse_request_end() */
- 		smp_rmb();
- 	}
- }
-@@ -635,10 +656,11 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc,
- 	req->in.h.unique = unique;
- 	spin_lock(&fiq->waitq.lock);
- 	if (fiq->connected) {
--		queue_request(fiq, req);
-+		queue_request_and_unlock(fiq, req);
- 		err = 0;
-+	} else {
-+		spin_unlock(&fiq->waitq.lock);
- 	}
--	spin_unlock(&fiq->waitq.lock);
- 
- 	return err;
- }
-@@ -1236,7 +1258,7 @@ __releases(fiq->waitq.lock)
-  * the pending list and copies request data to userspace buffer.  If
-  * no reply is needed (FORGET) or request has been aborted or there
-  * was an error during the copying then it's finished by calling
-- * request_end().  Otherwise add it to the processing list, and set
-+ * fuse_request_end().  Otherwise add it to the processing list, and set
-  * the 'sent' flag.
-  */
- static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
-@@ -1295,7 +1317,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
- 		/* SETXATTR is special, since it may contain too large data */
- 		if (in->h.opcode == FUSE_SETXATTR)
- 			req->out.h.error = -E2BIG;
--		request_end(fc, req);
-+		fuse_request_end(fc, req);
- 		goto restart;
- 	}
- 	spin_lock(&fpq->lock);
-@@ -1337,7 +1359,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
- 	if (!test_bit(FR_PRIVATE, &req->flags))
- 		list_del_init(&req->list);
- 	spin_unlock(&fpq->lock);
--	request_end(fc, req);
-+	fuse_request_end(fc, req);
- 	return err;
- 
-  err_unlock:
-@@ -1824,7 +1846,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
- 	if (out->h.error)
- 		return nbytes != reqsize ? -EINVAL : 0;
- 
--	reqsize += len_args(out->numargs, out->args);
-+	reqsize += fuse_len_args(out->numargs, out->args);
- 
- 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
- 		return -EINVAL;
-@@ -1844,7 +1866,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
-  * the write buffer.  The request is then searched on the processing
-  * list by the unique ID found in the header.  If found, then remove
-  * it from the list and copy the rest of the buffer to the request.
-- * The request is finished by calling request_end()
-+ * The request is finished by calling fuse_request_end().
-  */
- static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
- 				 struct fuse_copy_state *cs, size_t nbytes)
-@@ -1931,7 +1953,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
- 		list_del_init(&req->list);
- 	spin_unlock(&fpq->lock);
- 
--	request_end(fc, req);
-+	fuse_request_end(fc, req);
- 
- 	return err ? err : nbytes;
- 
-@@ -2077,7 +2099,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
- 		req->out.h.error = -ECONNABORTED;
- 		clear_bit(FR_SENT, &req->flags);
- 		list_del_init(&req->list);
--		request_end(fc, req);
-+		fuse_request_end(fc, req);
- 	}
- }
- 
-@@ -2223,7 +2245,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
- 	if (new->private_data)
- 		return -EINVAL;
- 
--	fud = fuse_dev_alloc(fc);
-+	fud = fuse_dev_alloc_install(fc);
- 	if (!fud)
- 		return -ENOMEM;
- 
-diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
-index 82a132217..3f923fe78 100644
---- a/fs/fuse/dir.c
-+++ b/fs/fuse/dir.c
-@@ -44,12 +44,26 @@ union fuse_dentry {
- 	struct rcu_head rcu;
- };
- 
--static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
-+static void fuse_dentry_settime(struct dentry *dentry, u64 time)
- {
--	((union fuse_dentry *) entry->d_fsdata)->time = time;
-+	/*
-+	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
-+	 *  Don't care about races, either way it's just an optimization
-+	 */
-+	if ((time && (dentry->d_flags & DCACHE_OP_DELETE)) ||
-+	    (!time && !(dentry->d_flags & DCACHE_OP_DELETE))) {
-+		spin_lock(&dentry->d_lock);
-+		if (time)
-+			dentry->d_flags &= ~DCACHE_OP_DELETE;
-+		else
-+			dentry->d_flags |= DCACHE_OP_DELETE;
-+		spin_unlock(&dentry->d_lock);
-+	}
-+
-+	((union fuse_dentry *) dentry->d_fsdata)->time = time;
- }
- 
--static inline u64 fuse_dentry_time(struct dentry *entry)
-+static inline u64 fuse_dentry_time(const struct dentry *entry)
- {
- 	return ((union fuse_dentry *) entry->d_fsdata)->time;
- }
-@@ -280,8 +294,14 @@ static void fuse_dentry_release(struct dentry *dentry)
- 	kfree_rcu(fd, rcu);
- }
- 
-+static int fuse_dentry_delete(const struct dentry *dentry)
-+{
-+	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
-+}
-+
- const struct dentry_operations fuse_dentry_operations = {
- 	.d_revalidate	= fuse_dentry_revalidate,
-+	.d_delete	= fuse_dentry_delete,
- 	.d_init		= fuse_dentry_init,
- 	.d_release	= fuse_dentry_release,
- };
-@@ -1728,8 +1748,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
- 	 */
- 	if ((is_truncate || !is_wb) &&
- 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-+		down_write(&fi->i_mmap_sem);
- 		truncate_pagecache(inode, outarg.attr.size);
- 		invalidate_inode_pages2(inode->i_mapping);
-+		up_write(&fi->i_mmap_sem);
- 	}
- 
- 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
-diff --git a/fs/fuse/file.c b/fs/fuse/file.c
-index 9a22aa580..7333b449e 100644
---- a/fs/fuse/file.c
-+++ b/fs/fuse/file.c
-@@ -18,8 +18,18 @@
- #include <linux/swap.h>
- #include <linux/falloc.h>
- #include <linux/uio.h>
-+#include <linux/dax.h>
-+#include <linux/iomap.h>
-+#include <linux/interval_tree_generic.h>
- 
--static const struct file_operations fuse_direct_io_file_operations;
-+INTERVAL_TREE_DEFINE(struct fuse_dax_mapping,
-+                     rb, __u64, __subtree_last,
-+                     START, LAST, static inline, fuse_dax_interval_tree);
-+
-+static long __fuse_file_fallocate(struct file *file, int mode,
-+					loff_t offset, loff_t length);
-+static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
-+					struct inode *inode);
- 
- static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
- 			  int opcode, struct fuse_open_out *outargp)
-@@ -170,13 +180,222 @@ static void fuse_link_write_file(struct file *file)
- 	spin_unlock(&fc->lock);
- }
- 
-+static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn *fc)
-+{
-+	unsigned long free_threshold;
-+	struct fuse_dax_mapping *dmap = NULL;
-+
-+	spin_lock(&fc->lock);
-+
-+	/* TODO: Add logic to try to free up memory if wait is allowed */
-+	if (fc->nr_free_ranges <= 0) {
-+		spin_unlock(&fc->lock);
-+		goto out_kick;
-+	}
-+
-+	WARN_ON(list_empty(&fc->free_ranges));
-+
-+	/* Take a free range */
-+	dmap = list_first_entry(&fc->free_ranges, struct fuse_dax_mapping,
-+					list);
-+	list_del_init(&dmap->list);
-+	fc->nr_free_ranges--;
-+	spin_unlock(&fc->lock);
-+
-+out_kick:
-+	/* If number of free ranges are below threshold, start reclaim */
-+	free_threshold = max((fc->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD)/100,
-+				(unsigned long)1);
-+	if (fc->nr_free_ranges < free_threshold) {
-+		pr_debug("fuse: Kicking dax memory reclaim worker. nr_free_ranges=0x%ld nr_total_ranges=%ld\n", fc->nr_free_ranges, fc->nr_ranges);
-+		queue_delayed_work(system_long_wq, &fc->dax_free_work, 0);
-+	}
-+	return dmap;
-+}
-+
-+/* This assumes fc->lock is held */
-+static void __dmap_remove_busy_list(struct fuse_conn *fc,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	list_del_init(&dmap->busy_list);
-+	WARN_ON(fc->nr_busy_ranges == 0);
-+	fc->nr_busy_ranges--;
-+}
-+
-+static void dmap_remove_busy_list(struct fuse_conn *fc,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	spin_lock(&fc->lock);
-+	__dmap_remove_busy_list(fc, dmap);
-+	spin_unlock(&fc->lock);
-+}
-+
-+/* This assumes fc->lock is held */
-+static void __free_dax_mapping(struct fuse_conn *fc,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	list_add_tail(&dmap->list, &fc->free_ranges);
-+	fc->nr_free_ranges++;
-+	/* TODO: Wake up only when needed */
-+	wake_up(&fc->dax_range_waitq);
-+}
-+
-+static void free_dax_mapping(struct fuse_conn *fc,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	/* Return fuse_dax_mapping to free list */
-+	spin_lock(&fc->lock);
-+	__free_dax_mapping(fc, dmap);
-+	spin_unlock(&fc->lock);
-+}
-+
-+/* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
-+static int fuse_setup_one_mapping(struct inode *inode,
-+				struct file *file, loff_t offset,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_file *ff = NULL;
-+	struct fuse_setupmapping_in inarg;
-+	FUSE_ARGS(args);
-+	ssize_t err;
-+
-+	if (file)
-+		ff = file->private_data;
-+
-+	WARN_ON(offset % FUSE_DAX_MEM_RANGE_SZ);
-+	WARN_ON(fc->nr_free_ranges < 0);
-+
-+	/* Ask fuse daemon to setup mapping */
-+	memset(&inarg, 0, sizeof(inarg));
-+	inarg.foffset = offset;
-+	if (ff)
-+		inarg.fh = ff->fh;
-+	else
-+		inarg.fh = -1;
-+	inarg.moffset = dmap->window_offset;
-+	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
-+	if (file) {
-+		inarg.flags |= (file->f_mode & FMODE_WRITE) ?
-+				FUSE_SETUPMAPPING_FLAG_WRITE : 0;
-+		inarg.flags |= (file->f_mode & FMODE_READ) ?
-+				FUSE_SETUPMAPPING_FLAG_READ : 0;
-+	} else {
-+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
-+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
-+	}
-+	args.in.h.opcode = FUSE_SETUPMAPPING;
-+	args.in.h.nodeid = fi->nodeid;
-+	args.in.numargs = 1;
-+	args.in.args[0].size = sizeof(inarg);
-+	args.in.args[0].value = &inarg;
-+	err = fuse_simple_request(fc, &args);
-+	if (err < 0) {
-+		printk(KERN_ERR "%s request failed at mem_offset=0x%llx %zd\n",
-+				 __func__, dmap->window_offset, err);
-+		return err;
-+	}
-+
-+	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
-+
-+	/*
-+	 * We don't take a refernce on inode. inode is valid right now and
-+	 * when inode is going away, cleanup logic should first cleanup
-+	 * dmap entries.
-+	 *
-+	 * TODO: Do we need to ensure that we are holding inode lock
-+	 * as well.
-+	 */
-+	dmap->inode = inode;
-+	dmap->start = offset;
-+	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
-+	/* Protected by fi->i_dmap_sem */
-+	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
-+	fi->nr_dmaps++;
-+	spin_lock(&fc->lock);
-+	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
-+	fc->nr_busy_ranges++;
-+	spin_unlock(&fc->lock);
-+	return 0;
-+}
-+
-+static int fuse_removemapping_one(struct inode *inode,
-+					struct fuse_dax_mapping *dmap)
-+{
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	struct fuse_removemapping_in inarg;
-+	FUSE_ARGS(args);
-+
-+	memset(&inarg, 0, sizeof(inarg));
-+	inarg.moffset = dmap->window_offset;
-+	inarg.len = dmap->length;
-+	args.in.h.opcode = FUSE_REMOVEMAPPING;
-+	args.in.h.nodeid = fi->nodeid;
-+	args.in.numargs = 1;
-+	args.in.args[0].size = sizeof(inarg);
-+	args.in.args[0].value = &inarg;
-+	return fuse_simple_request(fc, &args);
-+}
-+
-+/*
-+ * It is called from evict_inode() and by that time inode is going away. So
-+ * this function does not take any locks like fi->i_dmap_sem for traversing
-+ * that fuse inode interval tree. If that lock is taken then lock validator
-+ * complains of deadlock situation w.r.t fs_reclaim lock.
-+ */
-+void fuse_removemapping(struct inode *inode)
-+{
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	ssize_t err;
-+	struct fuse_dax_mapping *dmap;
-+
-+	/* Clear the mappings list */
-+	while (true) {
-+		WARN_ON(fi->nr_dmaps < 0);
-+
-+		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0,
-+								-1);
-+		if (dmap) {
-+			fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
-+			fi->nr_dmaps--;
-+			dmap_remove_busy_list(fc, dmap);
-+		}
-+
-+		if (!dmap)
-+			break;
-+
-+		/*
-+		 * During umount/shutdown, fuse connection is dropped first
-+		 * and later evict_inode() is called later. That means any
-+		 * removemapping messages are going to fail. Send messages
-+		 * only if connection is up. Otherwise fuse daemon is
-+		 * responsible for cleaning up any leftover references and
-+		 * mappings.
-+		 */
-+		if (fc->connected) {
-+			err = fuse_removemapping_one(inode, dmap);
-+			if (err) {
-+				pr_warn("Failed to removemapping. offset=0x%llx"
-+					" len=0x%llx\n", dmap->window_offset,
-+					dmap->length);
-+			}
-+		}
-+
-+		dmap->inode = NULL;
-+
-+		/* Add it back to free ranges list */
-+		free_dax_mapping(fc, dmap);
-+	}
-+}
-+
- void fuse_finish_open(struct inode *inode, struct file *file)
- {
- 	struct fuse_file *ff = file->private_data;
- 	struct fuse_conn *fc = get_fuse_conn(inode);
- 
--	if (ff->open_flags & FOPEN_DIRECT_IO)
--		file->f_op = &fuse_direct_io_file_operations;
- 	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
- 		invalidate_inode_pages2(inode->i_mapping);
- 	if (ff->open_flags & FOPEN_STREAM)
-@@ -204,7 +423,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
- 	int err;
- 	bool lock_inode = (file->f_flags & O_TRUNC) &&
- 			  fc->atomic_o_trunc &&
--			  fc->writeback_cache;
-+			  (fc->writeback_cache || IS_DAX(inode));
- 
- 	err = generic_file_open(inode, file);
- 	if (err)
-@@ -252,6 +471,7 @@ void fuse_release_common(struct file *file, bool isdir)
- 	struct fuse_file *ff = file->private_data;
- 	struct fuse_req *req = ff->reserved_req;
- 	int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
-+	bool sync = false;
- 
- 	fuse_prepare_release(ff, file->f_flags, opcode);
- 
-@@ -272,8 +492,20 @@ void fuse_release_common(struct file *file, bool isdir)
- 	 * Make the release synchronous if this is a fuseblk mount,
- 	 * synchronous RELEASE is allowed (and desirable) in this case
- 	 * because the server can be trusted not to screw up.
-+	 *
-+	 * For DAX, fuse server is trusted. So it should be fine to
-+	 * do a sync file put. Doing async file put is creating
-+	 * problems right now because when request finish, iput()
-+	 * can lead to freeing of inode. That means it tears down
-+	 * mappings backing DAX memory and sends REMOVEMAPPING message
-+	 * to server and blocks for completion. Currently, waiting
-+	 * in req->end context deadlocks the system as same worker thread
-+	 * can't process REMOVEMAPPING reply it is waiting for.
- 	 */
--	fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
-+	if (IS_DAX(req->misc.release.inode) || ff->fc->destroy_req != NULL)
-+		sync = true;
-+
-+	fuse_file_put(ff, sync, isdir);
- }
- 
- static int fuse_open(struct inode *inode, struct file *file)
-@@ -918,11 +1150,23 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
- 	return err;
- }
- 
-+
-+static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to);
-+static ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
-+
- static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
- {
--	struct inode *inode = iocb->ki_filp->f_mapping->host;
-+	struct file *file = iocb->ki_filp;
-+	struct fuse_file *ff = file->private_data;
-+	struct inode *inode = file->f_mapping->host;
- 	struct fuse_conn *fc = get_fuse_conn(inode);
- 
-+	if (ff->open_flags & FOPEN_DIRECT_IO)
-+		return fuse_direct_read_iter(iocb, to);
-+
-+	if (IS_DAX(inode))
-+		return fuse_dax_read_iter(iocb, to);
-+
- 	/*
- 	 * In auto invalidate mode, always update attributes on read.
- 	 * Otherwise, only update if we attempt to read past EOF (to ensure
-@@ -1170,9 +1414,14 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
- 	return res > 0 ? res : err;
- }
- 
-+static ssize_t fuse_direct_write_iter(struct kiocb *iocb,
-+				      struct iov_iter *from);
-+static ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
-+
- static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
- {
- 	struct file *file = iocb->ki_filp;
-+	struct fuse_file *ff = file->private_data;
- 	struct address_space *mapping = file->f_mapping;
- 	ssize_t written = 0;
- 	ssize_t written_buffered = 0;
-@@ -1180,6 +1429,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
- 	ssize_t err;
- 	loff_t endbyte = 0;
- 
-+	if (ff->open_flags & FOPEN_DIRECT_IO)
-+		return fuse_direct_write_iter(iocb, from);
-+	if (IS_DAX(inode))
-+		return fuse_dax_write_iter(iocb, from);
-+
- 	if (get_fuse_conn(inode)->writeback_cache) {
- 		/* Update size (EOF optimization) and mode (SUID clearing) */
- 		err = fuse_update_attributes(mapping->host, file);
-@@ -1444,16 +1698,279 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
- 	/* Don't allow parallel writes to the same file */
- 	inode_lock(inode);
- 	res = generic_write_checks(iocb, from);
--	if (res > 0)
--		res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
-+	if (res < 0)
-+		goto out_invalidate;
-+
-+	res = file_remove_privs(iocb->ki_filp);
-+	if (res)
-+		goto out_invalidate;
-+
-+	res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
-+	if (res < 0)
-+		goto out_invalidate;
-+
- 	fuse_invalidate_attr(inode);
--	if (res > 0)
--		fuse_write_update_size(inode, iocb->ki_pos);
-+	fuse_write_update_size(inode, iocb->ki_pos);
- 	inode_unlock(inode);
-+	return res;
- 
-+out_invalidate:
-+	fuse_invalidate_attr(inode);
-+	inode_unlock(inode);
- 	return res;
- }
- 
-+static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
-+{
-+	iomap->addr = IOMAP_NULL_ADDR;
-+	iomap->length = length;
-+	iomap->type = IOMAP_HOLE;
-+}
-+
-+static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
-+			struct iomap *iomap, struct fuse_dax_mapping *dmap,
-+			unsigned flags)
-+{
-+	loff_t offset, len;
-+	loff_t i_size = i_size_read(inode);
-+
-+	offset = pos - dmap->start;
-+	len = min(length, dmap->length - offset);
-+
-+	/* If length is beyond end of file, truncate further */
-+	if (pos + len > i_size)
-+		len = i_size - pos;
-+
-+	if (len > 0) {
-+		iomap->addr = dmap->window_offset + offset;
-+		iomap->length = len;
-+		if (flags & IOMAP_FAULT)
-+			iomap->length = ALIGN(len, PAGE_SIZE);
-+		iomap->type = IOMAP_MAPPED;
-+		pr_debug("%s: returns iomap: addr 0x%llx offset 0x%llx"
-+				" length 0x%llx\n", __func__, iomap->addr,
-+				iomap->offset, iomap->length);
-+	} else {
-+		/* Mapping beyond end of file is hole */
-+		fuse_fill_iomap_hole(iomap, length);
-+		pr_debug("%s: returns iomap: addr 0x%llx offset 0x%llx"
-+				"length 0x%llx\n", __func__, iomap->addr,
-+				iomap->offset, iomap->length);
-+	}
-+}
-+
-+/* This is just for DAX and the mapping is ephemeral, do not use it for other
-+ * purposes since there is no block device with a permanent mapping.
-+ */
-+static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-+			    unsigned flags, struct iomap *iomap)
-+{
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
-+	int ret;
-+
-+	/* We don't support FIEMAP */
-+	BUG_ON(flags & IOMAP_REPORT);
-+
-+	pr_debug("fuse_iomap_begin() called. pos=0x%llx length=0x%llx\n",
-+			pos, length);
-+
-+	iomap->offset = pos;
-+	iomap->flags = 0;
-+	iomap->bdev = NULL;
-+	iomap->dax_dev = fc->dax_dev;
-+
-+	/*
-+	 * Both read/write and mmap path can race here. So we need something
-+	 * to make sure if we are setting up mapping, then other path waits
-+	 *
-+	 * For now, use a semaphore for this. It probably needs to be
-+	 * optimized later.
-+	 */
-+	down_read(&fi->i_dmap_sem);
-+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
-+
-+	if (dmap) {
-+		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-+		up_read(&fi->i_dmap_sem);
-+		return 0;
-+	} else {
-+		up_read(&fi->i_dmap_sem);
-+		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
-+				__func__, pos, length);
-+		if (pos >= i_size_read(inode))
-+			goto iomap_hole;
-+
-+		/* Can't do reclaim in fault path yet due to lock ordering.
-+		 * Read path takes shared inode lock and that's not sufficient
-+		 * for inline range reclaim. Caller needs to drop lock, wait
-+		 * and retry.
-+		 */
-+		if (flags & IOMAP_FAULT || !(flags & IOMAP_WRITE)) {
-+			alloc_dmap = alloc_dax_mapping(fc);
-+			if (!alloc_dmap)
-+				return -ENOSPC;
-+		} else {
-+			alloc_dmap = alloc_dax_mapping_reclaim(fc, inode);
-+			if (IS_ERR(alloc_dmap))
-+				return PTR_ERR(alloc_dmap);
-+		}
-+
-+		/* If we are here, we should have memory allocated */
-+		if (WARN_ON(!alloc_dmap))
-+			return -EBUSY;
-+
-+		/*
-+		 * Drop read lock and take write lock so that only one
-+		 * caller can try to setup mapping and other waits
-+		 */
-+		down_write(&fi->i_dmap_sem);
-+		/*
-+		 * We dropped lock. Check again if somebody else setup
-+		 * mapping already.
-+		 */
-+		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos,
-+							pos);
-+		if (dmap) {
-+			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-+			free_dax_mapping(fc, alloc_dmap);
-+			up_write(&fi->i_dmap_sem);
-+			return 0;
-+		}
-+
-+		/* Setup one mapping */
-+		ret = fuse_setup_one_mapping(inode, NULL,
-+				ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
-+				alloc_dmap);
-+		if (ret < 0) {
-+			printk("fuse_setup_one_mapping() failed. err=%d"
-+				" pos=0x%llx\n", ret, pos);
-+			free_dax_mapping(fc, alloc_dmap);
-+			up_write(&fi->i_dmap_sem);
-+			return ret;
-+		}
-+		fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
-+		up_write(&fi->i_dmap_sem);
-+		return 0;
-+	}
-+
-+	/*
-+	 * If read beyond end of file happnes, fs code seems to return
-+	 * it as hole
-+	 */
-+iomap_hole:
-+	fuse_fill_iomap_hole(iomap, length);
-+	pr_debug("fuse_iomap_begin() returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", pos, length, iomap->length);
-+	return 0;
-+}
-+
-+static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
-+			  ssize_t written, unsigned flags,
-+			  struct iomap *iomap)
-+{
-+	/* DAX writes beyond end-of-file aren't handled using iomap, so the
-+	 * file size is unchanged and there is nothing to do here.
-+	 */
-+	return 0;
-+}
-+
-+static const struct iomap_ops fuse_iomap_ops = {
-+	.iomap_begin = fuse_iomap_begin,
-+	.iomap_end = fuse_iomap_end,
-+};
-+
-+static ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
-+{
-+	struct inode *inode = file_inode(iocb->ki_filp);
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	ssize_t ret;
-+	bool retry = false;
-+
-+retry:
-+	if (retry && !(fc->nr_free_ranges > 0)) {
-+		ret = -EINTR;
-+		if (wait_event_killable_exclusive(fc->dax_range_waitq,
-+						  (fc->nr_free_ranges > 0))) {
-+			goto out;
-+		}
-+	}
-+
-+	if (iocb->ki_flags & IOCB_NOWAIT) {
-+		if (!inode_trylock_shared(inode))
-+			return -EAGAIN;
-+	} else {
-+		inode_lock_shared(inode);
-+	}
-+
-+	ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
-+	inode_unlock_shared(inode);
-+
-+	/* If a dax range could not be allocated and it can't be reclaimed
-+	 * inline, then drop inode lock and retry. Range reclaim logic
-+	 * requires exclusive access to inode lock.
-+	 *
-+	 * TODO: What if -ENOSPC needs to be returned to user space. Fix it.
-+	 */
-+	if (ret == -ENOSPC) {
-+		retry = true;
-+		goto retry;
-+	}
-+	/* TODO file_accessed(iocb->f_filp) */
-+
-+out:
-+	return ret;
-+}
-+
-+static ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
-+{
-+	struct inode *inode = file_inode(iocb->ki_filp);
-+	ssize_t ret;
-+
-+	if (iocb->ki_flags & IOCB_NOWAIT) {
-+		if (!inode_trylock(inode))
-+			return -EAGAIN;
-+	} else {
-+		inode_lock(inode);
-+	}
-+
-+	ret = generic_write_checks(iocb, from);
-+	if (ret <= 0)
-+		goto out;
-+
-+	ret = file_remove_privs(iocb->ki_filp);
-+	if (ret)
-+		goto out;
-+	/* TODO file_update_time() but we don't want metadata I/O */
-+
-+	/* TODO handle growing the file */
-+	/* Grow file here if need be. iomap_begin() does not have access
-+	 * to file pointer
-+	 */
-+	if (iov_iter_rw(from) == WRITE &&
-+	    ((iocb->ki_pos + iov_iter_count(from)) > i_size_read(inode))) {
-+		ret = __fuse_file_fallocate(iocb->ki_filp, 0, iocb->ki_pos,
-+						iov_iter_count(from));
-+		if (ret < 0) {
-+			printk("fallocate(offset=0x%llx length=0x%zx)"
-+			" failed. err=%zd\n", iocb->ki_pos,
-+			iov_iter_count(from), ret);
-+			goto out;
-+		}
-+		pr_debug("fallocate(offset=0x%llx length=0x%zx)"
-+		" succeed. ret=%zd\n", iocb->ki_pos, iov_iter_count(from), ret);
-+	}
-+
-+	ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
-+
-+out:
-+	inode_unlock(inode);
-+
-+	if (ret > 0)
-+		ret = generic_write_sync(iocb, ret);
-+	return ret;
-+}
-+
- static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
- {
- 	int i;
-@@ -1903,6 +2420,17 @@ static int fuse_writepages_fill(struct page *page,
- 	return err;
- }
- 
-+static int fuse_dax_writepages(struct address_space *mapping,
-+				struct writeback_control *wbc)
-+{
-+
-+	struct inode *inode = mapping->host;
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+
-+	return dax_writeback_mapping_range(mapping,
-+		NULL, fc->dax_dev, wbc);
-+}
-+
- static int fuse_writepages(struct address_space *mapping,
- 			   struct writeback_control *wbc)
- {
-@@ -2076,8 +2604,20 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
- 	.page_mkwrite	= fuse_page_mkwrite,
- };
- 
-+static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma);
-+static int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
-+
- static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
- {
-+	struct fuse_file *ff = file->private_data;
-+
-+	/* DAX mmap is superior to direct_io mmap */
-+	if (IS_DAX(file_inode(file)))
-+		return fuse_dax_mmap(file, vma);
-+
-+	if (ff->open_flags & FOPEN_DIRECT_IO)
-+		return fuse_direct_mmap(file, vma);
-+
- 	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
- 		fuse_link_write_file(file);
- 
-@@ -2097,6 +2637,103 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
- 	return generic_file_mmap(file, vma);
- }
- 
-+static ssize_t fuse_file_splice_read(struct file *in, loff_t *ppos,
-+				     struct pipe_inode_info *pipe, size_t len,
-+				     unsigned int flags)
-+{
-+	struct fuse_file *ff = in->private_data;
-+
-+	if (ff->open_flags & FOPEN_DIRECT_IO)
-+		return default_file_splice_read(in, ppos, pipe, len, flags);
-+	else
-+		return generic_file_splice_read(in, ppos, pipe, len, flags);
-+
-+}
-+static int __fuse_dax_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
-+			    bool write)
-+{
-+	int ret, error = 0;
-+	struct inode *inode = file_inode(vmf->vma->vm_file);
-+	struct super_block *sb = inode->i_sb;
-+	pfn_t pfn;
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+	bool retry = false;
-+
-+	if (write)
-+		sb_start_pagefault(sb);
-+
-+retry:
-+	if (retry && !(fc->nr_free_ranges > 0)) {
-+		ret = -EINTR;
-+		if (wait_event_killable_exclusive(fc->dax_range_waitq,
-+					(fc->nr_free_ranges > 0)))
-+			goto out;
-+	}
-+
-+	/*
-+	 * We need to serialize against not only truncate but also against
-+	 * fuse dax memory range reclaim. While a range is being reclaimed,
-+	 * we do not want any read/write/mmap to make progress and try
-+	 * to populate page cache or access memory we are trying to free.
-+	 */
-+	down_read(&get_fuse_inode(inode)->i_mmap_sem);
-+	ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
-+	if ((ret & VM_FAULT_ERROR) && error == -ENOSPC) {
-+		error = 0;
-+		retry = true;
-+		up_read(&get_fuse_inode(inode)->i_mmap_sem);
-+		goto retry;
-+	}
-+
-+	if (ret & VM_FAULT_NEEDDSYNC)
-+		ret = dax_finish_sync_fault(vmf, pe_size, pfn);
-+
-+	up_read(&get_fuse_inode(inode)->i_mmap_sem);
-+
-+out:
-+	if (write)
-+		sb_end_pagefault(sb);
-+
-+	return ret;
-+}
-+
-+static int fuse_dax_fault(struct vm_fault *vmf)
-+{
-+	return __fuse_dax_fault(vmf, PE_SIZE_PTE,
-+				vmf->flags & FAULT_FLAG_WRITE);
-+}
-+
-+static int fuse_dax_huge_fault(struct vm_fault *vmf,
-+			       enum page_entry_size pe_size)
-+{
-+	return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
-+}
-+
-+static int fuse_dax_page_mkwrite(struct vm_fault *vmf)
-+{
-+	return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
-+}
-+
-+static int fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
-+{
-+	return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
-+}
-+
-+static const struct vm_operations_struct fuse_dax_vm_ops = {
-+	.fault		= fuse_dax_fault,
-+	.huge_fault	= fuse_dax_huge_fault,
-+	.page_mkwrite	= fuse_dax_page_mkwrite,
-+	.pfn_mkwrite	= fuse_dax_pfn_mkwrite,
-+};
-+
-+static int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	file_accessed(file);
-+	vma->vm_ops = &fuse_dax_vm_ops;
-+	vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
-+	return 0;
-+}
-+
- static int convert_fuse_file_lock(struct fuse_conn *fc,
- 				  const struct fuse_file_lock *ffl,
- 				  struct file_lock *fl)
-@@ -2940,8 +3577,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
- 	return ret;
- }
- 
--static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
--				loff_t length)
-+/*
-+ * This variant does not take any inode lock and if locking is required,
-+ * caller is supposed to hold lock
-+ */
-+static long __fuse_file_fallocate(struct file *file, int mode,
-+					loff_t offset, loff_t length)
- {
- 	struct fuse_file *ff = file->private_data;
- 	struct inode *inode = file_inode(file);
-@@ -2955,8 +3596,6 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
- 		.mode = mode
- 	};
- 	int err;
--	bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
--			   (mode & FALLOC_FL_PUNCH_HOLE);
- 
- 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
- 		return -EOPNOTSUPP;
-@@ -2964,17 +3603,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
- 	if (fc->no_fallocate)
- 		return -EOPNOTSUPP;
- 
--	if (lock_inode) {
--		inode_lock(inode);
--		if (mode & FALLOC_FL_PUNCH_HOLE) {
--			loff_t endbyte = offset + length - 1;
--			err = filemap_write_and_wait_range(inode->i_mapping,
--							   offset, endbyte);
--			if (err)
--				goto out;
--
--			fuse_sync_writes(inode);
--		}
-+	if (mode & FALLOC_FL_PUNCH_HOLE) {
-+		loff_t endbyte = offset + length - 1;
-+		err = filemap_write_and_wait_range(inode->i_mapping, offset,
-+							endbyte);
-+		if (err)
-+			goto out;
-+		fuse_sync_writes(inode);
- 	}
- 
- 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-@@ -3008,18 +3643,42 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
- 			file_update_time(file);
- 	}
- 
--	if (mode & FALLOC_FL_PUNCH_HOLE)
-+	if (mode & FALLOC_FL_PUNCH_HOLE) {
-+		down_write(&fi->i_mmap_sem);
- 		truncate_pagecache_range(inode, offset, offset + length - 1);
--
-+		up_write(&fi->i_mmap_sem);
-+	}
- 	fuse_invalidate_attr(inode);
- 
- out:
- 	if (!(mode & FALLOC_FL_KEEP_SIZE))
- 		clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
- 
-+	return err;
-+}
-+
-+static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
-+				loff_t length)
-+{
-+	struct fuse_file *ff = file->private_data;
-+	struct inode *inode = file_inode(file);
-+	struct fuse_conn *fc = ff->fc;
-+	int err;
-+	bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
-+			   (mode & FALLOC_FL_PUNCH_HOLE);
-+
-+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-+		return -EOPNOTSUPP;
-+
-+	if (fc->no_fallocate)
-+		return -EOPNOTSUPP;
-+
- 	if (lock_inode)
--		inode_unlock(inode);
-+		inode_lock(inode);
- 
-+	err = __fuse_file_fallocate(file, mode, offset, length);
-+	if (lock_inode)
-+		inode_unlock(inode);
- 	return err;
- }
- 
-@@ -3027,38 +3686,21 @@ static const struct file_operations fuse_file_operations = {
- 	.llseek		= fuse_file_llseek,
- 	.read_iter	= fuse_file_read_iter,
- 	.write_iter	= fuse_file_write_iter,
--	.mmap		= fuse_file_mmap,
-+ 	.mmap		= fuse_file_mmap,
-+	.splice_read    = fuse_file_splice_read,
- 	.open		= fuse_open,
- 	.flush		= fuse_flush,
- 	.release	= fuse_release,
- 	.fsync		= fuse_fsync,
- 	.lock		= fuse_file_lock,
-+	.get_unmapped_area = thp_get_unmapped_area,
- 	.flock		= fuse_file_flock,
--	.splice_read	= generic_file_splice_read,
- 	.unlocked_ioctl	= fuse_file_ioctl,
- 	.compat_ioctl	= fuse_file_compat_ioctl,
- 	.poll		= fuse_file_poll,
- 	.fallocate	= fuse_file_fallocate,
- };
- 
--static const struct file_operations fuse_direct_io_file_operations = {
--	.llseek		= fuse_file_llseek,
--	.read_iter	= fuse_direct_read_iter,
--	.write_iter	= fuse_direct_write_iter,
--	.mmap		= fuse_direct_mmap,
--	.open		= fuse_open,
--	.flush		= fuse_flush,
--	.release	= fuse_release,
--	.fsync		= fuse_fsync,
--	.lock		= fuse_file_lock,
--	.flock		= fuse_file_flock,
--	.unlocked_ioctl	= fuse_file_ioctl,
--	.compat_ioctl	= fuse_file_compat_ioctl,
--	.poll		= fuse_file_poll,
--	.fallocate	= fuse_file_fallocate,
--	/* no splice_read */
--};
--
- static const struct address_space_operations fuse_file_aops  = {
- 	.readpage	= fuse_readpage,
- 	.writepage	= fuse_writepage,
-@@ -3072,8 +3714,271 @@ static const struct address_space_operations fuse_file_aops  = {
- 	.write_end	= fuse_write_end,
- };
- 
-+static const struct address_space_operations fuse_dax_file_aops  = {
-+	.writepages	= fuse_dax_writepages,
-+	.direct_IO	= noop_direct_IO,
-+	.set_page_dirty	= noop_set_page_dirty,
-+	.invalidatepage	= noop_invalidatepage,
-+};
-+
- void fuse_init_file_inode(struct inode *inode)
- {
-+ 	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_conn *fc = get_fuse_conn(inode);
-+
- 	inode->i_fop = &fuse_file_operations;
- 	inode->i_data.a_ops = &fuse_file_aops;
-+	fi->dmap_tree = RB_ROOT_CACHED;
-+
-+	if (fc->dax_dev) {
-+		inode->i_flags |= S_DAX;
-+		inode->i_data.a_ops = &fuse_dax_file_aops;
-+	}
-+}
-+
-+int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
-+				struct fuse_dax_mapping *dmap)
-+{
-+	int ret;
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+
-+	ret = filemap_fdatawrite_range(inode->i_mapping, dmap->start,
-+					dmap->end);
-+	if (ret) {
-+		printk("filemap_fdatawrite_range() failed. err=%d start=0x%llx,"
-+			" end=0x%llx\n", ret, dmap->start, dmap->end);
-+		return ret;
-+	}
-+
-+	ret = invalidate_inode_pages2_range(inode->i_mapping,
-+					dmap->start >> PAGE_SHIFT,
-+					dmap->end >> PAGE_SHIFT);
-+	/* TODO: What to do if above fails? For now,
-+	 * leave the range in place.
-+	 */
-+	if (ret) {
-+		printk("invalidate_inode_pages2_range() failed err=%d\n", ret);
-+		return ret;
-+	}
-+
-+	/* Remove dax mapping from inode interval tree now */
-+	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
-+	fi->nr_dmaps--;
-+	return 0;
-+}
-+
-+/* First first mapping in the tree and free it. */
-+struct fuse_dax_mapping *fuse_dax_reclaim_first_mapping_locked(
-+				struct fuse_conn *fc, struct inode *inode)
-+{
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_dax_mapping *dmap;
-+	int ret;
-+
-+	/* Find fuse dax mapping at file offset inode. */
-+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0, -1);
-+	if (!dmap)
-+		return NULL;
-+
-+	ret = fuse_dax_reclaim_dmap_locked(fc, inode, dmap);
-+	if (ret < 0)
-+		return ERR_PTR(ret);
-+
-+	/* Clean up dmap. Do not add back to free list */
-+	dmap_remove_busy_list(fc, dmap);
-+	dmap->inode = NULL;
-+	dmap->start = dmap->end = 0;
-+
-+	pr_debug("fuse: reclaimed memory range window_offset=0x%llx,"
-+				" length=0x%llx\n", dmap->window_offset,
-+				dmap->length);
-+	return dmap;
-+}
-+
-+/*
-+ * First first mapping in the tree and free it and return it. Do not add
-+ * it back to free pool.
-+ *
-+ * This is called with inode lock held.
-+ */
-+struct fuse_dax_mapping *fuse_dax_reclaim_first_mapping(struct fuse_conn *fc,
-+					struct inode *inode)
-+{
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_dax_mapping *dmap;
-+
-+	down_write(&fi->i_mmap_sem);
-+	down_write(&fi->i_dmap_sem);
-+	dmap = fuse_dax_reclaim_first_mapping_locked(fc, inode);
-+	up_write(&fi->i_dmap_sem);
-+	up_write(&fi->i_mmap_sem);
-+	return dmap;
-+}
-+
-+static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
-+					struct inode *inode)
-+{
-+	struct fuse_dax_mapping *dmap;
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+
-+	while(1) {
-+		dmap = alloc_dax_mapping(fc);
-+		if (dmap)
-+			return dmap;
-+
-+		if (fi->nr_dmaps)
-+			return fuse_dax_reclaim_first_mapping(fc, inode);
-+		/*
-+		 * There are no mappings which can be reclaimed.
-+		 * Wait for one.
-+		 */
-+		if (!(fc->nr_free_ranges > 0)) {
-+			if (wait_event_killable_exclusive(fc->dax_range_waitq,
-+					(fc->nr_free_ranges > 0)))
-+				return ERR_PTR(-EINTR);
-+		}
-+	}
-+}
-+
-+int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
-+				u64 dmap_start)
-+{
-+	int ret;
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+	struct fuse_dax_mapping *dmap;
-+
-+	WARN_ON(!inode_is_locked(inode));
-+
-+	/* Find fuse dax mapping at file offset inode. */
-+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, dmap_start,
-+							dmap_start);
-+
-+	/* Range already got cleaned up by somebody else */
-+	if (!dmap)
-+		return 0;
-+
-+	ret = fuse_dax_reclaim_dmap_locked(fc, inode, dmap);
-+	if (ret < 0)
-+		return ret;
-+
-+	/* Cleanup dmap entry and add back to free list */
-+	spin_lock(&fc->lock);
-+	__dmap_remove_busy_list(fc, dmap);
-+	dmap->inode = NULL;
-+	dmap->start = dmap->end = 0;
-+	__free_dax_mapping(fc, dmap);
-+	spin_unlock(&fc->lock);
-+
-+	pr_debug("fuse: freed memory range window_offset=0x%llx,"
-+				" length=0x%llx\n", dmap->window_offset,
-+				dmap->length);
-+	return ret;
-+}
-+
-+/*
-+ * Free a range of memory.
-+ * Locking.
-+ * 1. Take inode->i_rwsem to prever further read/write.
-+ * 2. Take fuse_inode->i_mmap_sem to block dax faults.
-+ * 3. Take fuse_inode->i_dmap_sem to protect interval tree. It might not
-+ *    be strictly necessary as lock 1 and 2 seem sufficient.
-+ */
-+int fuse_dax_free_one_mapping(struct fuse_conn *fc, struct inode *inode,
-+				u64 dmap_start)
-+{
-+	int ret;
-+	struct fuse_inode *fi = get_fuse_inode(inode);
-+
-+	/*
-+	 * If process is blocked waiting for memory while holding inode
-+	 * lock, we will deadlock. So continue to free next range.
-+	 */
-+	if (!inode_trylock(inode))
-+		return -EAGAIN;
-+	down_write(&fi->i_mmap_sem);
-+	down_write(&fi->i_dmap_sem);
-+	ret = fuse_dax_free_one_mapping_locked(fc, inode, dmap_start);
-+	up_write(&fi->i_dmap_sem);
-+	up_write(&fi->i_mmap_sem);
-+	inode_unlock(inode);
-+	return ret;
-+}
-+
-+int fuse_dax_free_memory(struct fuse_conn *fc, unsigned long nr_to_free)
-+{
-+	struct fuse_dax_mapping *dmap, *pos, *temp;
-+	int ret, nr_freed = 0, nr_eagain = 0;
-+	u64 dmap_start = 0, window_offset = 0;
-+	struct inode *inode = NULL;
-+
-+	/* Pick first busy range and free it for now*/
-+	while(1) {
-+		if (nr_freed >= nr_to_free)
-+			break;
-+
-+		if (nr_eagain > 20) {
-+			queue_delayed_work(system_long_wq, &fc->dax_free_work,
-+						msecs_to_jiffies(10));
-+			return 0;
-+		}
-+
-+		dmap = NULL;
-+		spin_lock(&fc->lock);
-+
-+		list_for_each_entry_safe(pos, temp, &fc->busy_ranges,
-+						busy_list) {
-+			inode = igrab(pos->inode);
-+			/*
-+			 * This inode is going away. That will free
-+			 * up all the ranges anyway, continue to
-+			 * next range.
-+			 */
-+			if (!inode)
-+				continue;
-+			/*
-+			 * Take this element off list and add it tail. If
-+			 * inode lock can't be obtained, this will help with
-+			 * selecting new element
-+			 */
-+			dmap = pos;
-+			list_move_tail(&dmap->busy_list, &fc->busy_ranges);
-+			dmap_start = dmap->start;
-+			window_offset = dmap->window_offset;
-+			break;
-+		}
-+		spin_unlock(&fc->lock);
-+		if (!dmap)
-+			return 0;
-+
-+		ret = fuse_dax_free_one_mapping(fc, inode, dmap_start);
-+		iput(inode);
-+		if (ret && ret != -EAGAIN) {
-+			printk("%s(window_offset=0x%llx) failed. err=%d\n",
-+				__func__, window_offset, ret);
-+			return ret;
-+		}
-+
-+		/* Could not get inode lock. Try next element */
-+		if (ret == -EAGAIN) {
-+			nr_eagain++;
-+			continue;
-+		}
-+		nr_freed++;
-+	}
-+	return 0;
-+}
-+
-+/* TODO: This probably should go in inode.c */
-+void fuse_dax_free_mem_worker(struct work_struct *work)
-+{
-+	int ret;
-+	struct fuse_conn *fc = container_of(work, struct fuse_conn,
-+						dax_free_work.work);
-+	pr_debug("fuse: Worker to free memory called.\n");
-+	pr_debug("fuse: Worker to free memory called. nr_free_ranges=%lu"
-+		 " nr_busy_ranges=%lu\n", fc->nr_free_ranges,
-+		 fc->nr_busy_ranges);
-+	ret = fuse_dax_free_memory(fc, FUSE_DAX_RECLAIM_CHUNK);
-+	if (ret)
-+		pr_debug("fuse: fuse_dax_free_memory() failed with err=%d\n", ret);
- }
-diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
-index cec8b8e74..1149281ab 100644
---- a/fs/fuse/fuse_i.h
-+++ b/fs/fuse/fuse_i.h
-@@ -43,6 +43,20 @@
- /** Number of page pointers embedded in fuse_req */
- #define FUSE_REQ_INLINE_PAGES 1
- 
-+/* Default memory range size, 2MB */
-+#define FUSE_DAX_MEM_RANGE_SZ	(2*1024*1024)
-+#define FUSE_DAX_MEM_RANGE_PAGES	(FUSE_DAX_MEM_RANGE_SZ/PAGE_SIZE)
-+
-+/* Number of ranges reclaimer will try to free in one invocation */
-+#define FUSE_DAX_RECLAIM_CHUNK		(10)
-+
-+/*
-+ * Dax memory reclaim threshold in percetage of total ranges. When free
-+ * number of free ranges drops below this threshold, reclaim can trigger
-+ * Default is 20%
-+ * */
-+#define FUSE_DAX_RECLAIM_THRESHOLD	(20)
-+
- /** List of active connections */
- extern struct list_head fuse_conn_list;
- 
-@@ -53,12 +67,73 @@ extern struct mutex fuse_mutex;
- extern unsigned max_user_bgreq;
- extern unsigned max_user_congthresh;
- 
-+/** Mount options */
-+struct fuse_mount_data {
-+	int fd;
-+	const char *tag; /* lifetime: .fill_super() data argument */
-+	unsigned rootmode;
-+	kuid_t user_id;
-+	kgid_t group_id;
-+	unsigned fd_present:1;
-+	unsigned tag_present:1;
-+	unsigned rootmode_present:1;
-+	unsigned user_id_present:1;
-+	unsigned group_id_present:1;
-+	unsigned default_permissions:1;
-+	unsigned allow_other:1;
-+	unsigned dax:1;
-+	unsigned destroy:1;
-+	unsigned max_read;
-+	unsigned blksize;
-+
-+	/* DAX device, may be NULL */
-+	struct dax_device *dax_dev;
-+
-+	/* fuse input queue operations */
-+	const struct fuse_iqueue_ops *fiq_ops;
-+
-+	/* device-specific state for fuse_iqueue */
-+	void *fiq_priv;
-+
-+	/* fuse_dev pointer to fill in, should contain NULL on entry */
-+	void **fudptr;
-+};
-+
- /* One forget request */
- struct fuse_forget_link {
- 	struct fuse_forget_one forget_one;
- 	struct fuse_forget_link *next;
- };
- 
-+#define START(node) ((node)->start)
-+#define LAST(node) ((node)->end)
-+
-+/** Translation information for file offsets to DAX window offsets */
-+struct fuse_dax_mapping {
-+	/* Pointer to inode where this memory range is mapped */
-+	struct inode *inode;
-+
-+	/* Will connect in fc->free_ranges to keep track of free memory */
-+	struct list_head list;
-+
-+	/* For interval tree in file/inode */
-+	struct rb_node rb;
-+	/** Start Position in file */
-+	__u64 start;
-+	/** End Position in file */
-+	__u64 end;
-+	__u64 __subtree_last;
-+
-+	/* Will connect in fc->busy_ranges to keep track busy memory */
-+	struct list_head busy_list;
-+
-+       /** Position in DAX window */
-+       u64 window_offset;
-+
-+       /** Length of mapping, in bytes */
-+       loff_t length;
-+};
-+
- /** FUSE inode */
- struct fuse_inode {
- 	/** Inode data */
-@@ -108,6 +183,22 @@ struct fuse_inode {
- 
- 	/** Lock for serializing lookup and readdir for back compatibility*/
- 	struct mutex mutex;
-+
-+	/*
-+	 * Semaphore to protect modifications to dmap_tree
-+	 */
-+	struct rw_semaphore i_dmap_sem;
-+
-+	/**
-+	 * Can't take inode lock in fault path (leads to circular dependency).
-+	 * So take this in fuse dax fault path to make sure truncate and
-+	 * punch hole etc. can't make progress in parallel.
-+	 */
-+	struct rw_semaphore i_mmap_sem;
-+
-+	/** Sorted rb tree of struct fuse_dax_mapping elements */
-+	struct rb_root_cached dmap_tree;
-+	unsigned long nr_dmaps;
- };
- 
- /** FUSE inode state bits */
-@@ -382,8 +473,44 @@ struct fuse_req {
- 
- 	/** Request is stolen from fuse_file->reserved_req */
- 	struct file *stolen_file;
-+
-+	/** virtio-fs's physically contiguous buffer for in and out args */
-+	void *argbuf;
- };
- 
-+struct fuse_iqueue;
-+
-+/**
-+ * Input queue callbacks
-+ *
-+ * Input queue signalling is device-specific.  For example, the /dev/fuse file
-+ * uses fiq->waitq and fasync to wake processes that are waiting on queue
-+ * readiness.  These callbacks allow other device types to respond to input
-+ * queue activity.
-+ */
-+struct fuse_iqueue_ops {
-+	/**
-+	 * Signal that a forget has been queued
-+	 */
-+	void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
-+	__releases(fiq->waitq.lock);
-+
-+	/**
-+	 * Signal that an INTERRUPT request has been queued
-+	 */
-+	void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
-+	__releases(fiq->waitq.lock);
-+
-+	/**
-+	 * Signal that a request has been queued
-+	 */
-+	void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
-+	__releases(fiq->waitq.lock);
-+};
-+
-+/** /dev/fuse input queue operations */
-+extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
-+
- struct fuse_iqueue {
- 	/** Connection established */
- 	unsigned connected;
-@@ -409,6 +536,12 @@ struct fuse_iqueue {
- 
- 	/** O_ASYNC requests */
- 	struct fasync_struct *fasync;
-+
-+	/** Device-specific callbacks */
-+	const struct fuse_iqueue_ops *ops;
-+
-+	/** Device-specific state */
-+	void *priv;
- };
- 
- struct fuse_pqueue {
-@@ -675,6 +808,28 @@ struct fuse_conn {
- 
- 	/** List of device instances belonging to this connection */
- 	struct list_head devices;
-+
-+	/** DAX device, non-NULL if DAX is supported */
-+	struct dax_device *dax_dev;
-+
-+	/* List of memory ranges which are busy */
-+	unsigned long nr_busy_ranges;
-+	struct list_head busy_ranges;
-+
-+	/* Worker to free up memory ranges */
-+	struct delayed_work dax_free_work;
-+
-+	/* Wait queue for a dax range to become free */
-+	wait_queue_head_t dax_range_waitq;
-+
-+	/*
-+	 * DAX Window Free Ranges. TODO: This might not be best place to store
-+	 * this free list
-+	 */
-+	unsigned long nr_free_ranges;
-+	struct list_head free_ranges;
-+
-+	unsigned long nr_ranges;
- };
- 
- static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
-@@ -860,6 +1015,11 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
- void fuse_request_send_background_locked(struct fuse_conn *fc,
- 					 struct fuse_req *req);
- 
-+/**
-+ * End a finished request
-+ */
-+void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req);
-+
- /* Abort all requests */
- void fuse_abort_conn(struct fuse_conn *fc, bool is_abort);
- void fuse_wait_aborted(struct fuse_conn *fc);
-@@ -881,16 +1041,42 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
- /**
-  * Initialize fuse_conn
-  */
--void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
-+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
-+			struct dax_device *dax_dev,
-+			const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
- 
- /**
-  * Release reference to fuse_conn
-  */
- void fuse_conn_put(struct fuse_conn *fc);
- 
--struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
-+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
-+struct fuse_dev *fuse_dev_alloc(void);
-+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
- void fuse_dev_free(struct fuse_dev *fud);
- 
-+/**
-+ * Parse a mount options string
-+ */
-+int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
-+				struct user_namespace *user_ns);
-+
-+/**
-+ * Fill in superblock and initialize fuse connection
-+ * @sb: partially-initialized superblock to fill in
-+ * @mount_data: mount parameters
-+ */
-+int fuse_fill_super_common(struct super_block *sb,
-+			   struct fuse_mount_data *mount_data);
-+void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req);
-+
-+/**
-+ * Disassociate fuse connection from superblock and kill the superblock
-+ *
-+ * Calls kill_anon_super(), use with do not use with bdev mounts.
-+ */
-+void fuse_kill_sb_anon(struct super_block *sb);
-+
- /**
-  * Add connection to control filesystem
-  */
-@@ -992,4 +1178,16 @@ struct posix_acl;
- struct posix_acl *fuse_get_acl(struct inode *inode, int type);
- int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
- 
-+/**
-+ * Return the number of bytes in an arguments list
-+ */
-+unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
-+
-+/**
-+ * Get the next unique ID for a request
-+ */
-+u64 fuse_get_unique(struct fuse_iqueue *fiq);
-+void fuse_dax_free_mem_worker(struct work_struct *work);
-+void fuse_removemapping(struct inode *inode);
-+
- #endif /* _FS_FUSE_I_H */
-diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
-index db9e60b7e..dd16c7f6a 100644
---- a/fs/fuse/inode.c
-+++ b/fs/fuse/inode.c
-@@ -22,6 +22,8 @@
- #include <linux/exportfs.h>
- #include <linux/posix_acl.h>
- #include <linux/pid_namespace.h>
-+#include <linux/dax.h>
-+#include <linux/pfn_t.h>
- 
- MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
- MODULE_DESCRIPTION("Filesystem in Userspace");
-@@ -59,21 +61,6 @@ MODULE_PARM_DESC(max_user_congthresh,
- /** Congestion starts at 75% of maximum */
- #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
- 
--struct fuse_mount_data {
--	int fd;
--	unsigned rootmode;
--	kuid_t user_id;
--	kgid_t group_id;
--	unsigned fd_present:1;
--	unsigned rootmode_present:1;
--	unsigned user_id_present:1;
--	unsigned group_id_present:1;
--	unsigned default_permissions:1;
--	unsigned allow_other:1;
--	unsigned max_read;
--	unsigned blksize;
--};
--
- struct fuse_forget_link *fuse_alloc_forget(void)
- {
- 	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
-@@ -96,11 +83,14 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
- 	fi->writectr = 0;
- 	fi->orig_ino = 0;
- 	fi->state = 0;
-+	fi->nr_dmaps = 0;
- 	INIT_LIST_HEAD(&fi->write_files);
- 	INIT_LIST_HEAD(&fi->queued_writes);
- 	INIT_LIST_HEAD(&fi->writepages);
- 	init_waitqueue_head(&fi->page_waitq);
- 	mutex_init(&fi->mutex);
-+	init_rwsem(&fi->i_mmap_sem);
-+	init_rwsem(&fi->i_dmap_sem);
- 	fi->forget = fuse_alloc_forget();
- 	if (!fi->forget) {
- 		kmem_cache_free(fuse_inode_cachep, inode);
-@@ -133,6 +123,10 @@ static void fuse_evict_inode(struct inode *inode)
- 	if (inode->i_sb->s_flags & SB_ACTIVE) {
- 		struct fuse_conn *fc = get_fuse_conn(inode);
- 		struct fuse_inode *fi = get_fuse_inode(inode);
-+		if (IS_DAX(inode)) {
-+			fuse_removemapping(inode);
-+			WARN_ON(fi->nr_dmaps);
-+		}
- 		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
- 		fi->forget = NULL;
- 	}
-@@ -447,6 +441,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
- 
- enum {
- 	OPT_FD,
-+	OPT_TAG,
- 	OPT_ROOTMODE,
- 	OPT_USER_ID,
- 	OPT_GROUP_ID,
-@@ -454,11 +449,13 @@ enum {
- 	OPT_ALLOW_OTHER,
- 	OPT_MAX_READ,
- 	OPT_BLKSIZE,
-+	OPT_DAX,
- 	OPT_ERR
- };
- 
- static const match_table_t tokens = {
- 	{OPT_FD,			"fd=%u"},
-+	{OPT_TAG,			"tag=%s"},
- 	{OPT_ROOTMODE,			"rootmode=%o"},
- 	{OPT_USER_ID,			"user_id=%u"},
- 	{OPT_GROUP_ID,			"group_id=%u"},
-@@ -466,6 +463,7 @@ static const match_table_t tokens = {
- 	{OPT_ALLOW_OTHER,		"allow_other"},
- 	{OPT_MAX_READ,			"max_read=%u"},
- 	{OPT_BLKSIZE,			"blksize=%u"},
-+	{OPT_DAX,			"dax"},
- 	{OPT_ERR,			NULL}
- };
- 
-@@ -480,7 +478,7 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
- 	return err;
- }
- 
--static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
-+int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
- 			  struct user_namespace *user_ns)
- {
- 	char *p;
-@@ -505,6 +503,11 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
- 			d->fd_present = 1;
- 			break;
- 
-+		case OPT_TAG:
-+			d->tag = args[0].from;
-+			d->tag_present = 1;
-+			break;
-+
- 		case OPT_ROOTMODE:
- 			if (match_octal(&args[0], &value))
- 				return 0;
-@@ -552,17 +555,22 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
- 			d->blksize = value;
- 			break;
- 
-+		case OPT_DAX:
-+			d->dax = 1;
-+			break;
-+
- 		default:
- 			return 0;
- 		}
- 	}
- 
--	if (!d->fd_present || !d->rootmode_present ||
--	    !d->user_id_present || !d->group_id_present)
-+	if (!d->rootmode_present || !d->user_id_present ||
-+	    !d->group_id_present)
- 		return 0;
- 
- 	return 1;
- }
-+EXPORT_SYMBOL_GPL(parse_fuse_opt);
- 
- static int fuse_show_options(struct seq_file *m, struct dentry *root)
- {
-@@ -579,10 +587,14 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
- 		seq_printf(m, ",max_read=%u", fc->max_read);
- 	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
- 		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
-+	if (fc->dax_dev)
-+		seq_printf(m, ",dax");
- 	return 0;
- }
- 
--static void fuse_iqueue_init(struct fuse_iqueue *fiq)
-+static void fuse_iqueue_init(struct fuse_iqueue *fiq,
-+			     const struct fuse_iqueue_ops *ops,
-+			     void *priv)
- {
- 	memset(fiq, 0, sizeof(struct fuse_iqueue));
- 	init_waitqueue_head(&fiq->waitq);
-@@ -590,6 +602,8 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq)
- 	INIT_LIST_HEAD(&fiq->interrupts);
- 	fiq->forget_list_tail = &fiq->forget_list_head;
- 	fiq->connected = 1;
-+	fiq->ops = ops;
-+	fiq->priv = priv;
- }
- 
- static void fuse_pqueue_init(struct fuse_pqueue *fpq)
-@@ -601,7 +615,84 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
- 	fpq->connected = 1;
- }
- 
--void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
-+static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
-+{
-+	struct fuse_dax_mapping *range, *temp;
-+
-+	/* Free All allocated elements */
-+	list_for_each_entry_safe(range, temp, mem_list, list) {
-+		list_del(&range->list);
-+		if (!list_empty(&range->busy_list))
-+			list_del(&range->busy_list);
-+		kfree(range);
-+	}
-+}
-+
-+#ifdef CONFIG_FS_DAX
-+static int fuse_dax_mem_range_init(struct fuse_conn *fc,
-+				   struct dax_device *dax_dev)
-+{
-+	long nr_pages, nr_ranges;
-+	void *kaddr;
-+	pfn_t pfn;
-+	struct fuse_dax_mapping *range;
-+	LIST_HEAD(mem_ranges);
-+	phys_addr_t phys_addr;
-+	int ret = 0, id;
-+	size_t dax_size = -1;
-+	unsigned long allocated_ranges = 0, i;
-+
-+	id = dax_read_lock();
-+	nr_pages = dax_direct_access(dax_dev, 0, PHYS_PFN(dax_size), &kaddr,
-+					&pfn);
-+	dax_read_unlock(id);
-+	if (nr_pages < 0) {
-+		pr_debug("dax_direct_access() returned %ld\n", nr_pages);
-+		return nr_pages;
-+	}
-+
-+	phys_addr = pfn_t_to_phys(pfn);
-+	nr_ranges = nr_pages/FUSE_DAX_MEM_RANGE_PAGES;
-+	printk("fuse_dax_mem_range_init(): dax mapped %ld pages. nr_ranges=%ld\n", nr_pages, nr_ranges);
-+
-+	for (i = 0; i < nr_ranges; i++) {
-+		range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
-+		if (!range) {
-+			pr_debug("memory allocation for mem_range failed.\n");
-+			ret = -ENOMEM;
-+			goto out_err;
-+		}
-+		/* TODO: This offset only works if virtio-fs driver is not
-+		 * having some memory hidden at the beginning. This needs
-+		 * better handling
-+		 */
-+		range->window_offset = i * FUSE_DAX_MEM_RANGE_SZ;
-+		range->length = FUSE_DAX_MEM_RANGE_SZ;
-+		list_add_tail(&range->list, &mem_ranges);
-+		INIT_LIST_HEAD(&range->busy_list);
-+		allocated_ranges++;
-+	}
-+
-+	list_replace_init(&mem_ranges, &fc->free_ranges);
-+	fc->nr_free_ranges = allocated_ranges;
-+	fc->nr_ranges = allocated_ranges;
-+	return 0;
-+out_err:
-+	/* Free All allocated elements */
-+	fuse_free_dax_mem_ranges(&mem_ranges);
-+	return ret;
-+}
-+#else /* !CONFIG_FS_DAX */
-+static inline int fuse_dax_mem_range_init(struct fuse_conn *fc,
-+					  struct dax_device *dax_dev)
-+{
-+	return 0;
-+}
-+#endif /* CONFIG_FS_DAX */
-+
-+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
-+			struct dax_device *dax_dev,
-+			const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
- {
- 	memset(fc, 0, sizeof(*fc));
- 	spin_lock_init(&fc->lock);
-@@ -610,7 +701,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
- 	atomic_set(&fc->dev_count, 1);
- 	init_waitqueue_head(&fc->blocked_waitq);
- 	init_waitqueue_head(&fc->reserved_req_waitq);
--	fuse_iqueue_init(&fc->iq);
-+	init_waitqueue_head(&fc->dax_range_waitq);
-+	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
- 	INIT_LIST_HEAD(&fc->bg_queue);
- 	INIT_LIST_HEAD(&fc->entry);
- 	INIT_LIST_HEAD(&fc->devices);
-@@ -625,7 +717,11 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
- 	fc->attr_version = 1;
- 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
- 	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
-+	fc->dax_dev = dax_dev;
- 	fc->user_ns = get_user_ns(user_ns);
-+	INIT_LIST_HEAD(&fc->free_ranges);
-+	INIT_LIST_HEAD(&fc->busy_ranges);
-+	INIT_DELAYED_WORK(&fc->dax_free_work, fuse_dax_free_mem_worker);
- }
- EXPORT_SYMBOL_GPL(fuse_conn_init);
- 
-@@ -634,6 +730,9 @@ void fuse_conn_put(struct fuse_conn *fc)
- 	if (refcount_dec_and_test(&fc->count)) {
- 		if (fc->destroy_req)
- 			fuse_request_free(fc->destroy_req);
-+		flush_delayed_work(&fc->dax_free_work);
-+		if (fc->dax_dev)
-+			fuse_free_dax_mem_ranges(&fc->free_ranges);
- 		put_pid_ns(fc->pid_ns);
- 		put_user_ns(fc->user_ns);
- 		fc->release(fc);
-@@ -943,7 +1042,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
- 	wake_up_all(&fc->blocked_waitq);
- }
- 
--static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
-+void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
- {
- 	struct fuse_init_in *arg = &req->misc.init_in;
- 
-@@ -972,6 +1071,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
- 	req->end = process_init_reply;
- 	fuse_request_send_background(fc, req);
- }
-+EXPORT_SYMBOL_GPL(fuse_send_init);
- 
- static void fuse_free_conn(struct fuse_conn *fc)
- {
-@@ -1019,24 +1119,38 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
- 	return 0;
- }
- 
--struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
--{
-+struct fuse_dev *fuse_dev_alloc(void) {
- 	struct fuse_dev *fud;
- 
- 	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
--	if (fud) {
--		fud->fc = fuse_conn_get(fc);
-+	if (fud)
- 		fuse_pqueue_init(&fud->pq);
- 
--		spin_lock(&fc->lock);
--		list_add_tail(&fud->entry, &fc->devices);
--		spin_unlock(&fc->lock);
--	}
--
- 	return fud;
- }
- EXPORT_SYMBOL_GPL(fuse_dev_alloc);
- 
-+void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) {
-+	fud->fc = fuse_conn_get(fc);
-+	spin_lock(&fc->lock);
-+	list_add_tail(&fud->entry, &fc->devices);
-+	spin_unlock(&fc->lock);
-+}
-+EXPORT_SYMBOL_GPL(fuse_dev_install);
-+
-+struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
-+{
-+	struct fuse_dev *fud;
-+
-+	fud = fuse_dev_alloc();
-+	if (!fud)
-+		return NULL;
-+
-+	fuse_dev_install(fud, fc);
-+	return fud;
-+}
-+EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
-+
- void fuse_dev_free(struct fuse_dev *fud)
- {
- 	struct fuse_conn *fc = fud->fc;
-@@ -1052,15 +1166,13 @@ void fuse_dev_free(struct fuse_dev *fud)
- }
- EXPORT_SYMBOL_GPL(fuse_dev_free);
- 
--static int fuse_fill_super(struct super_block *sb, void *data, int silent)
-+int fuse_fill_super_common(struct super_block *sb,
-+			   struct fuse_mount_data *mount_data)
- {
- 	struct fuse_dev *fud;
- 	struct fuse_conn *fc;
- 	struct inode *root;
--	struct fuse_mount_data d;
--	struct file *file;
- 	struct dentry *root_dentry;
--	struct fuse_req *init_req;
- 	int err;
- 	int is_bdev = sb->s_bdev != NULL;
- 
-@@ -1070,13 +1182,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 
- 	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
- 
--	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
--		goto err;
--
- 	if (is_bdev) {
- #ifdef CONFIG_BLOCK
- 		err = -EINVAL;
--		if (!sb_set_blocksize(sb, d.blksize))
-+		if (!sb_set_blocksize(sb, mount_data->blksize))
- 			goto err;
- #endif
- 	} else {
-@@ -1093,19 +1202,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 	if (sb->s_user_ns != &init_user_ns)
- 		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
- 
--	file = fget(d.fd);
--	err = -EINVAL;
--	if (!file)
--		goto err;
--
--	/*
--	 * Require mount to happen from the same user namespace which
--	 * opened /dev/fuse to prevent potential attacks.
--	 */
--	if (file->f_op != &fuse_dev_operations ||
--	    file->f_cred->user_ns != sb->s_user_ns)
--		goto err_fput;
--
- 	/*
- 	 * If we are not in the initial user namespace posix
- 	 * acls must be translated.
-@@ -1116,12 +1212,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
- 	err = -ENOMEM;
- 	if (!fc)
--		goto err_fput;
-+		goto err;
- 
--	fuse_conn_init(fc, sb->s_user_ns);
-+	fuse_conn_init(fc, sb->s_user_ns, mount_data->dax_dev,
-+			mount_data->fiq_ops, mount_data->fiq_priv);
- 	fc->release = fuse_free_conn;
- 
--	fud = fuse_dev_alloc(fc);
-+	if (mount_data->dax_dev) {
-+		err = fuse_dax_mem_range_init(fc, mount_data->dax_dev);
-+		if (err) {
-+			pr_debug("fuse_dax_mem_range_init() returned %d\n", err);
-+			goto err_free_ranges;
-+		}
-+	}
-+
-+	fud = fuse_dev_alloc_install(fc);
- 	if (!fud)
- 		goto err_put_conn;
- 
-@@ -1136,17 +1241,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 		fc->dont_mask = 1;
- 	sb->s_flags |= SB_POSIXACL;
- 
--	fc->default_permissions = d.default_permissions;
--	fc->allow_other = d.allow_other;
--	fc->user_id = d.user_id;
--	fc->group_id = d.group_id;
--	fc->max_read = max_t(unsigned, 4096, d.max_read);
-+	fc->default_permissions = mount_data->default_permissions;
-+	fc->allow_other = mount_data->allow_other;
-+	fc->user_id = mount_data->user_id;
-+	fc->group_id = mount_data->group_id;
-+	fc->max_read = max_t(unsigned, 4096, mount_data->max_read);
- 
- 	/* Used by get_root_inode() */
- 	sb->s_fs_info = fc;
- 
- 	err = -ENOMEM;
--	root = fuse_get_root_inode(sb, d.rootmode);
-+	root = fuse_get_root_inode(sb, mount_data->rootmode);
- 	sb->s_d_op = &fuse_root_dentry_operations;
- 	root_dentry = d_make_root(root);
- 	if (!root_dentry)
-@@ -1154,20 +1259,15 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 	/* Root dentry doesn't have .d_revalidate */
- 	sb->s_d_op = &fuse_dentry_operations;
- 
--	init_req = fuse_request_alloc(0);
--	if (!init_req)
--		goto err_put_root;
--	__set_bit(FR_BACKGROUND, &init_req->flags);
--
--	if (is_bdev) {
-+	if (mount_data->destroy) {
- 		fc->destroy_req = fuse_request_alloc(0);
- 		if (!fc->destroy_req)
--			goto err_free_init_req;
-+			goto err_put_root;
- 	}
- 
- 	mutex_lock(&fuse_mutex);
- 	err = -EINVAL;
--	if (file->private_data)
-+	if (*mount_data->fudptr)
- 		goto err_unlock;
- 
- 	err = fuse_ctl_add_conn(fc);
-@@ -1176,35 +1276,82 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
- 
- 	list_add_tail(&fc->entry, &fuse_conn_list);
- 	sb->s_root = root_dentry;
--	file->private_data = fud;
-+	*mount_data->fudptr = fud;
- 	mutex_unlock(&fuse_mutex);
--	/*
--	 * atomic_dec_and_test() in fput() provides the necessary
--	 * memory barrier for file->private_data to be visible on all
--	 * CPUs after this
--	 */
--	fput(file);
--
--	fuse_send_init(fc, init_req);
--
- 	return 0;
- 
-  err_unlock:
- 	mutex_unlock(&fuse_mutex);
-- err_free_init_req:
--	fuse_request_free(init_req);
-  err_put_root:
- 	dput(root_dentry);
-  err_dev_free:
- 	fuse_dev_free(fud);
-+ err_free_ranges:
-+	if (mount_data->dax_dev)
-+		fuse_free_dax_mem_ranges(&fc->free_ranges);
-  err_put_conn:
- 	fuse_conn_put(fc);
- 	sb->s_fs_info = NULL;
-- err_fput:
--	fput(file);
-  err:
- 	return err;
- }
-+EXPORT_SYMBOL_GPL(fuse_fill_super_common);
-+
-+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
-+{
-+	struct fuse_mount_data d;
-+	struct file *file;
-+	int is_bdev = sb->s_bdev != NULL;
-+	int err;
-+	struct fuse_req *init_req;
-+
-+	err = -EINVAL;
-+	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
-+		goto err;
-+	if (!d.fd_present || d.tag_present)
-+		goto err;
-+
-+	file = fget(d.fd);
-+	if (!file)
-+		goto err;
-+
-+	/*
-+	 * Require mount to happen from the same user namespace which
-+	 * opened /dev/fuse to prevent potential attacks.
-+	 */
-+	if ((file->f_op != &fuse_dev_operations) ||
-+	    (file->f_cred->user_ns != sb->s_user_ns))
-+		goto err_fput;
-+
-+	init_req = fuse_request_alloc(0);
-+	if (!init_req)
-+		goto err_fput;
-+	__set_bit(FR_BACKGROUND, &init_req->flags);
-+
-+	d.dax_dev = NULL;
-+	d.fiq_ops = &fuse_dev_fiq_ops;
-+	d.fiq_priv = NULL;
-+	d.fudptr = &file->private_data;
-+	d.destroy = is_bdev;
-+	err = fuse_fill_super_common(sb, &d);
-+	if (err < 0)
-+		goto err_free_init_req;
-+	/*
-+	 * atomic_dec_and_test() in fput() provides the necessary
-+	 * memory barrier for file->private_data to be visible on all
-+	 * CPUs after this
-+	 */
-+	fput(file);
-+	fuse_send_init(get_fuse_conn_super(sb), init_req);
-+	return 0;
-+
-+err_free_init_req:
-+	fuse_request_free(init_req);
-+err_fput:
-+	fput(file);
-+err:
-+	return err;
-+}
- 
- static struct dentry *fuse_mount(struct file_system_type *fs_type,
- 		       int flags, const char *dev_name,
-@@ -1229,11 +1376,12 @@ static void fuse_sb_destroy(struct super_block *sb)
- 	}
- }
- 
--static void fuse_kill_sb_anon(struct super_block *sb)
-+void fuse_kill_sb_anon(struct super_block *sb)
- {
- 	fuse_sb_destroy(sb);
- 	kill_anon_super(sb);
- }
-+EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
- 
- static struct file_system_type fuse_fs_type = {
- 	.owner		= THIS_MODULE,
-diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
-new file mode 100644
-index 000000000..a0a2cd1ce
---- /dev/null
-+++ b/fs/fuse/virtio_fs.c
-@@ -0,0 +1,1121 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * virtio-fs: Virtio Filesystem
-+ * Copyright (C) 2018 Red Hat, Inc.
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/dax.h>
-+#include <linux/pci.h>
-+#include <linux/pfn_t.h>
-+#include <linux/module.h>
-+#include <linux/virtio.h>
-+#include <linux/virtio_fs.h>
-+#include "fuse_i.h"
-+
-+/* List of virtio-fs device instances and a lock for the list */
-+static DEFINE_MUTEX(virtio_fs_mutex);
-+static LIST_HEAD(virtio_fs_instances);
-+
-+enum {
-+	VQ_HIPRIO,
-+	VQ_REQUEST
-+};
-+
-+/* Per-virtqueue state */
-+struct virtio_fs_vq {
-+	struct virtqueue *vq;     /* protected by fpq->lock */
-+	struct work_struct done_work;
-+	struct list_head queued_reqs;
-+	struct delayed_work dispatch_work;
-+	struct fuse_dev *fud;
-+	char name[24];
-+} ____cacheline_aligned_in_smp;
-+
-+/* State needed for devm_memremap_pages().  This API is called on the
-+ * underlying pci_dev instead of struct virtio_fs (layering violation).  Since
-+ * the memremap release function only gets called when the pci_dev is released,
-+ * keep the associated state separate from struct virtio_fs (it has a different
-+ * lifecycle from pci_dev).
-+ */
-+struct virtio_fs_memremap_info {
-+	struct dev_pagemap pgmap;
-+	struct percpu_ref ref;
-+	struct completion completion;
-+};
-+
-+/* A virtio-fs device instance */
-+struct virtio_fs {
-+	struct list_head list;    /* on virtio_fs_instances */
-+	char *tag;
-+	struct virtio_fs_vq *vqs;
-+	unsigned nvqs;            /* number of virtqueues */
-+	unsigned num_queues;      /* number of request queues */
-+	struct dax_device *dax_dev;
-+
-+	/* DAX memory window where file contents are mapped */
-+	void *window_kaddr;
-+	phys_addr_t window_phys_addr;
-+	size_t window_len;
-+};
-+
-+struct virtio_fs_forget {
-+	struct fuse_in_header ih;
-+	struct fuse_forget_in arg;
-+	/* This request can be temporarily queued on virt queue */
-+	struct list_head list;
-+};
-+
-+static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
-+{
-+	struct virtio_fs *fs = vq->vdev->priv;
-+
-+	return &fs->vqs[vq->index];
-+}
-+
-+static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
-+{
-+	return &vq_to_fsvq(vq)->fud->pq;
-+}
-+
-+/* Add a new instance to the list or return -EEXIST if tag name exists*/
-+static int virtio_fs_add_instance(struct virtio_fs *fs)
-+{
-+	struct virtio_fs *fs2;
-+	bool duplicate = false;
-+
-+	mutex_lock(&virtio_fs_mutex);
-+
-+	list_for_each_entry(fs2, &virtio_fs_instances, list) {
-+		if (strcmp(fs->tag, fs2->tag) == 0)
-+			duplicate = true;
-+	}
-+
-+	if (!duplicate)
-+		list_add_tail(&fs->list, &virtio_fs_instances);
-+
-+	mutex_unlock(&virtio_fs_mutex);
-+
-+	if (duplicate)
-+		return -EEXIST;
-+	return 0;
-+}
-+
-+/* Return the virtio_fs with a given tag, or NULL */
-+static struct virtio_fs *virtio_fs_find_instance(const char *tag)
-+{
-+	struct virtio_fs *fs;
-+
-+	mutex_lock(&virtio_fs_mutex);
-+
-+	list_for_each_entry(fs, &virtio_fs_instances, list) {
-+		if (strcmp(fs->tag, tag) == 0)
-+			goto found;
-+	}
-+
-+	fs = NULL; /* not found */
-+
-+found:
-+	mutex_unlock(&virtio_fs_mutex);
-+
-+	return fs;
-+}
-+
-+static void virtio_fs_free_devs(struct virtio_fs *fs)
-+{
-+	unsigned int i;
-+
-+	/* TODO lock */
-+
-+	for (i = 0; i < fs->nvqs; i++) {
-+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
-+
-+		if (!fsvq->fud)
-+			continue;
-+
-+		flush_work(&fsvq->done_work);
-+		flush_delayed_work(&fsvq->dispatch_work);
-+
-+		fuse_dev_free(fsvq->fud); /* TODO need to quiesce/end_requests/decrement dev_count */
-+		fsvq->fud = NULL;
-+	}
-+}
-+
-+/* Read filesystem name from virtio config into fs->tag (must kfree()). */
-+static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
-+{
-+	char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
-+	char *end;
-+	size_t len;
-+
-+	virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
-+			   &tag_buf, sizeof(tag_buf));
-+	end = memchr(tag_buf, '\0', sizeof(tag_buf));
-+	if (end == tag_buf)
-+		return -EINVAL; /* empty tag */
-+	if (!end)
-+		end = &tag_buf[sizeof(tag_buf)];
-+
-+	len = end - tag_buf;
-+	fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
-+	if (!fs->tag)
-+		return -ENOMEM;
-+	memcpy(fs->tag, tag_buf, len);
-+	fs->tag[len] = '\0';
-+	return 0;
-+}
-+
-+/* Work function for hiprio completion */
-+static void virtio_fs_hiprio_done_work(struct work_struct *work)
-+{
-+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
-+						 done_work);
-+	struct fuse_pqueue *fpq = &fsvq->fud->pq;
-+	struct virtqueue *vq = fsvq->vq;
-+
-+	/* Free completed FUSE_FORGET requests */
-+	spin_lock(&fpq->lock);
-+	do {
-+		unsigned len;
-+		void *req;
-+
-+		virtqueue_disable_cb(vq);
-+
-+		while ((req = virtqueue_get_buf(vq, &len)) != NULL)
-+			kfree(req);
-+	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
-+	spin_unlock(&fpq->lock);
-+}
-+
-+static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
-+{
-+	return;
-+}
-+
-+static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
-+{
-+	struct virtio_fs_forget *forget;
-+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
-+						 dispatch_work.work);
-+	struct fuse_pqueue *fpq = &fsvq->fud->pq;
-+	struct virtqueue *vq = fsvq->vq;
-+	struct scatterlist sg;
-+	struct scatterlist *sgs[] = {&sg};
-+	bool notify;
-+	int ret;
-+
-+	pr_debug("worker virtio_fs_hiprio_dispatch_work() called.\n");
-+	while(1) {
-+		spin_lock(&fpq->lock);
-+		forget = list_first_entry_or_null(&fsvq->queued_reqs,
-+					struct virtio_fs_forget, list);
-+		if (!forget) {
-+			spin_unlock(&fpq->lock);
-+			return;
-+		}
-+
-+		list_del(&forget->list);
-+		sg_init_one(&sg, forget, sizeof(*forget));
-+
-+		/* Enqueue the request */
-+		dev_dbg(&vq->vdev->dev, "%s\n", __func__);
-+		ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
-+		if (ret < 0) {
-+			if (ret == -ENOMEM || ret == -ENOSPC) {
-+				pr_debug("virtio-fs: Could not queue FORGET:"
-+					 " err=%d. Will try later\n", ret);
-+				list_add_tail(&forget->list,
-+						&fsvq->queued_reqs);
-+				schedule_delayed_work(&fsvq->dispatch_work,
-+						msecs_to_jiffies(1));
-+			} else {
-+				pr_debug("virtio-fs: Could not queue FORGET:"
-+					 " err=%d. Dropping it.\n", ret);
-+				kfree(forget);
-+			}
-+			spin_unlock(&fpq->lock);
-+			return;
-+		}
-+
-+		notify = virtqueue_kick_prepare(vq);
-+		spin_unlock(&fpq->lock);
-+
-+		if (notify)
-+			virtqueue_notify(vq);
-+		pr_debug("worker virtio_fs_hiprio_dispatch_work() dispatched one forget request.\n");
-+	}
-+}
-+
-+/* Allocate and copy args into req->argbuf */
-+static int copy_args_to_argbuf(struct fuse_req *req)
-+{
-+	unsigned offset = 0;
-+	unsigned num_in;
-+	unsigned num_out;
-+	unsigned len;
-+	unsigned i;
-+
-+	num_in = req->in.numargs - req->in.argpages;
-+	num_out = req->out.numargs - req->out.argpages;
-+	len = fuse_len_args(num_in, (struct fuse_arg *)req->in.args) +
-+	      fuse_len_args(num_out, req->out.args);
-+
-+	req->argbuf = kmalloc(len, GFP_ATOMIC);
-+	if (!req->argbuf)
-+		return -ENOMEM;
-+
-+	for (i = 0; i < num_in; i++) {
-+		memcpy(req->argbuf + offset,
-+		       req->in.args[i].value,
-+		       req->in.args[i].size);
-+		offset += req->in.args[i].size;
-+	}
-+
-+	return 0;
-+}
-+
-+/* Copy args out of and free req->argbuf */
-+static void copy_args_from_argbuf(struct fuse_req *req)
-+{
-+	unsigned remaining;
-+	unsigned offset;
-+	unsigned num_in;
-+	unsigned num_out;
-+	unsigned i;
-+
-+	remaining = req->out.h.len - sizeof(req->out.h);
-+	num_in = req->in.numargs - req->in.argpages;
-+	num_out = req->out.numargs - req->out.argpages;
-+	offset = fuse_len_args(num_in, (struct fuse_arg *)req->in.args);
-+
-+	for (i = 0; i < num_out; i++) {
-+		unsigned argsize = req->out.args[i].size;
-+
-+		if (req->out.argvar &&
-+		    i == req->out.numargs - 1 &&
-+		    argsize > remaining) {
-+			argsize = remaining;
-+		}
-+
-+		memcpy(req->out.args[i].value, req->argbuf + offset, argsize);
-+		offset += argsize;
-+
-+		if (i != req->out.numargs - 1)
-+			remaining -= argsize;
-+	}
-+
-+	/* Store the actual size of the variable-length arg */
-+	if (req->out.argvar)
-+		req->out.args[req->out.numargs - 1].size = remaining;
-+
-+	kfree(req->argbuf);
-+	req->argbuf = NULL;
-+}
-+
-+/* Work function for request completion */
-+static void virtio_fs_requests_done_work(struct work_struct *work)
-+{
-+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
-+						 done_work);
-+	struct fuse_pqueue *fpq = &fsvq->fud->pq;
-+	struct fuse_conn *fc = fsvq->fud->fc;
-+	struct virtqueue *vq = fsvq->vq;
-+	struct fuse_req *req;
-+	struct fuse_req *next;
-+	LIST_HEAD(reqs);
-+
-+	/* Collect completed requests off the virtqueue */
-+	spin_lock(&fpq->lock);
-+	do {
-+		unsigned len;
-+
-+		virtqueue_disable_cb(vq);
-+
-+		while ((req = virtqueue_get_buf(vq, &len)) != NULL)
-+			list_move_tail(&req->list, &reqs);
-+	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
-+	spin_unlock(&fpq->lock);
-+
-+	/* End requests */
-+	list_for_each_entry_safe(req, next, &reqs, list) {
-+		/* TODO check unique */
-+		/* TODO fuse_len_args(out) against oh.len */
-+
-+		copy_args_from_argbuf(req);
-+
-+		/* TODO zeroing? */
-+
-+		spin_lock(&fpq->lock);
-+		clear_bit(FR_SENT, &req->flags);
-+		list_del_init(&req->list);
-+		spin_unlock(&fpq->lock);
-+
-+		fuse_request_end(fc, req);
-+	}
-+}
-+
-+/* Virtqueue interrupt handler */
-+static void virtio_fs_vq_done(struct virtqueue *vq)
-+{
-+	struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
-+
-+	dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
-+
-+	schedule_work(&fsvq->done_work);
-+}
-+
-+/* Initialize virtqueues */
-+static int virtio_fs_setup_vqs(struct virtio_device *vdev,
-+			       struct virtio_fs *fs)
-+{
-+	struct virtqueue **vqs;
-+	vq_callback_t **callbacks;
-+	const char **names;
-+	unsigned i;
-+	int ret;
-+
-+	virtio_cread(vdev, struct virtio_fs_config, num_queues,
-+		     &fs->num_queues);
-+	if (fs->num_queues == 0)
-+		return -EINVAL;
-+
-+	fs->nvqs = 1 + fs->num_queues;
-+
-+	fs->vqs = devm_kcalloc(&vdev->dev, fs->nvqs,
-+				sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
-+	if (!fs->vqs)
-+		return -ENOMEM;
-+
-+	vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
-+	callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
-+					GFP_KERNEL);
-+	names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
-+	if (!vqs || !callbacks || !names) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
-+	snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
-+			"hiprio");
-+	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
-+	INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
-+	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
-+	INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
-+			virtio_fs_hiprio_dispatch_work);
-+
-+	/* Initialize the requests virtqueues */
-+	for (i = VQ_REQUEST; i < fs->nvqs; i++) {
-+		INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
-+		INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
-+					virtio_fs_dummy_dispatch_work);
-+		INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
-+		snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
-+			 "requests.%u", i - VQ_REQUEST);
-+		callbacks[i] = virtio_fs_vq_done;
-+		names[i] = fs->vqs[i].name;
-+	}
-+
-+	ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
-+	if (ret < 0)
-+		goto out;
-+
-+	for (i = 0; i < fs->nvqs; i++)
-+		fs->vqs[i].vq = vqs[i];
-+
-+out:
-+	kfree(names);
-+	kfree(callbacks);
-+	kfree(vqs);
-+	return ret;
-+}
-+
-+/* Free virtqueues (device must already be reset) */
-+static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
-+				  struct virtio_fs *fs)
-+{
-+	vdev->config->del_vqs(vdev);
-+}
-+
-+/* Map a window offset to a page frame number.  The window offset will have
-+ * been produced by .iomap_begin(), which maps a file offset to a window
-+ * offset.
-+ */
-+static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-+				    long nr_pages, void **kaddr, pfn_t *pfn)
-+{
-+	struct virtio_fs *fs = dax_get_private(dax_dev);
-+	phys_addr_t offset = PFN_PHYS(pgoff);
-+	size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
-+
-+	pr_debug("virtio_fs_direct_access(): called. nr_pages=%ld max_nr_pages=%zu\n", nr_pages, max_nr_pages);
-+
-+	if (kaddr)
-+		*kaddr = fs->window_kaddr + offset;
-+	if (pfn)
-+		*pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
-+					PFN_DEV | PFN_MAP);
-+	return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
-+}
-+
-+static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
-+				       pgoff_t pgoff, void *addr,
-+				       size_t bytes, struct iov_iter *i)
-+{
-+	return copy_from_iter(addr, bytes, i);
-+}
-+
-+static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
-+				       pgoff_t pgoff, void *addr,
-+				       size_t bytes, struct iov_iter *i)
-+{
-+	return copy_to_iter(addr, bytes, i);
-+}
-+
-+static const struct dax_operations virtio_fs_dax_ops = {
-+	.direct_access = virtio_fs_direct_access,
-+	.copy_from_iter = virtio_fs_copy_from_iter,
-+	.copy_to_iter = virtio_fs_copy_to_iter,
-+};
-+
-+static void virtio_fs_percpu_release(struct percpu_ref *ref)
-+{
-+	struct virtio_fs_memremap_info *mi =
-+		container_of(ref, struct virtio_fs_memremap_info, ref);
-+
-+	complete(&mi->completion);
-+}
-+
-+static void virtio_fs_percpu_exit(void *data)
-+{
-+	struct virtio_fs_memremap_info *mi = data;
-+
-+	wait_for_completion(&mi->completion);
-+	percpu_ref_exit(&mi->ref);
-+}
-+
-+static void virtio_fs_percpu_kill(struct percpu_ref *ref)
-+{
-+	percpu_ref_kill(ref);
-+}
-+
-+static void virtio_fs_cleanup_dax(void *data)
-+{
-+	struct virtio_fs *fs = data;
-+
-+	kill_dax(fs->dax_dev);
-+	put_dax(fs->dax_dev);
-+}
-+
-+static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
-+{
-+	struct virtio_shm_region cache_reg;
-+	struct virtio_fs_memremap_info *mi;
-+	struct dev_pagemap *pgmap;
-+	bool have_cache;
-+	int ret;
-+
-+	if (!IS_ENABLED(CONFIG_DAX_DRIVER))
-+		return 0;
-+
-+	/* Get cache region */
-+	have_cache = virtio_get_shm_region(vdev,
-+					   &cache_reg,
-+					   (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
-+	if (!have_cache) {
-+		dev_err(&vdev->dev, "%s: No cache capability\n", __func__);
-+		return -ENXIO;
-+	} else {
-+		dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n",
-+			   cache_reg.len, cache_reg.addr);
-+	}
-+
-+	mi = devm_kzalloc(&vdev->dev, sizeof(*mi), GFP_KERNEL);
-+	if (!mi)
-+		return -ENOMEM;
-+
-+	init_completion(&mi->completion);
-+	ret = percpu_ref_init(&mi->ref, virtio_fs_percpu_release, 0,
-+			      GFP_KERNEL);
-+	if (ret < 0) {
-+		dev_err(&vdev->dev, "%s: percpu_ref_init failed (%d)\n",
-+			__func__, ret);
-+		return ret;
-+	}
-+
-+	ret = devm_add_action(&vdev->dev, virtio_fs_percpu_exit, mi);
-+	if (ret < 0) {
-+		percpu_ref_exit(&mi->ref);
-+		return ret;
-+	}
-+
-+	pgmap = &mi->pgmap;
-+	pgmap->altmap_valid = false;
-+	pgmap->ref = &mi->ref;
-+	pgmap->kill = virtio_fs_percpu_kill;
-+	pgmap->type = MEMORY_DEVICE_FS_DAX;
-+
-+	/* Ideally we would directly use the PCI BAR resource but
-+	 * devm_memremap_pages() wants its own copy in pgmap.  So
-+	 * initialize a struct resource from scratch (only the start
-+	 * and end fields will be used).
-+	 */
-+	pgmap->res = (struct resource){
-+		.name = "virtio-fs dax window",
-+		.start = (phys_addr_t) cache_reg.addr,
-+		.end = (phys_addr_t) cache_reg.addr + cache_reg.len,
-+	};
-+
-+	fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
-+	if (IS_ERR(fs->window_kaddr))
-+		return PTR_ERR(fs->window_kaddr);
-+
-+	fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
-+	fs->window_len = (phys_addr_t) cache_reg.len;
-+
-+	dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx"
-+		" len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr,
-+		cache_reg.len);
-+
-+	fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops);
-+	if (!fs->dax_dev)
-+		return -ENOMEM;
-+
-+	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, fs);
-+}
-+
-+static int virtio_fs_probe(struct virtio_device *vdev)
-+{
-+	struct virtio_fs *fs;
-+	int ret;
-+
-+	fs = devm_kzalloc(&vdev->dev, sizeof(*fs), GFP_KERNEL);
-+	if (!fs)
-+		return -ENOMEM;
-+	vdev->priv = fs;
-+
-+	ret = virtio_fs_read_tag(vdev, fs);
-+	if (ret < 0)
-+		goto out;
-+
-+	ret = virtio_fs_setup_vqs(vdev, fs);
-+	if (ret < 0)
-+		goto out;
-+
-+	/* TODO vq affinity */
-+	/* TODO populate notifications vq */
-+
-+	ret = virtio_fs_setup_dax(vdev, fs);
-+	if (ret < 0)
-+		goto out_vqs;
-+
-+	/* Bring the device online in case the filesystem is mounted and
-+	 * requests need to be sent before we return.
-+	 */
-+	virtio_device_ready(vdev);
-+
-+	ret = virtio_fs_add_instance(fs);
-+	if (ret < 0)
-+		goto out_vqs;
-+
-+	return 0;
-+
-+out_vqs:
-+	vdev->config->reset(vdev);
-+	virtio_fs_cleanup_vqs(vdev, fs);
-+out:
-+	vdev->priv = NULL;
-+	return ret;
-+}
-+
-+static void virtio_fs_remove(struct virtio_device *vdev)
-+{
-+	struct virtio_fs *fs = vdev->priv;
-+
-+	virtio_fs_free_devs(fs);
-+
-+	vdev->config->reset(vdev);
-+	virtio_fs_cleanup_vqs(vdev, fs);
-+
-+	mutex_lock(&virtio_fs_mutex);
-+	list_del(&fs->list);
-+	mutex_unlock(&virtio_fs_mutex);
-+
-+	vdev->priv = NULL;
-+}
-+
-+#ifdef CONFIG_PM
-+static int virtio_fs_freeze(struct virtio_device *vdev)
-+{
-+	return 0; /* TODO */
-+}
-+
-+static int virtio_fs_restore(struct virtio_device *vdev)
-+{
-+	return 0; /* TODO */
-+}
-+#endif /* CONFIG_PM */
-+
-+const static struct virtio_device_id id_table[] = {
-+	{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
-+	{},
-+};
-+
-+const static unsigned int feature_table[] = {};
-+
-+static struct virtio_driver virtio_fs_driver = {
-+	.driver.name		= KBUILD_MODNAME,
-+	.driver.owner		= THIS_MODULE,
-+	.id_table		= id_table,
-+	.feature_table		= feature_table,
-+	.feature_table_size	= ARRAY_SIZE(feature_table),
-+	/* TODO validate config_get != NULL */
-+	.probe			= virtio_fs_probe,
-+	.remove			= virtio_fs_remove,
-+#ifdef CONFIG_PM_SLEEP
-+	.freeze			= virtio_fs_freeze,
-+	.restore		= virtio_fs_restore,
-+#endif
-+};
-+
-+static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
-+__releases(fiq->waitq.lock)
-+{
-+	struct fuse_forget_link *link;
-+	struct virtio_fs_forget *forget;
-+	struct fuse_pqueue *fpq;
-+	struct scatterlist sg;
-+	struct scatterlist *sgs[] = {&sg};
-+	struct virtio_fs *fs;
-+	struct virtqueue *vq;
-+	struct virtio_fs_vq *fsvq;
-+	bool notify;
-+	u64 unique;
-+	int ret;
-+
-+	BUG_ON(!fiq->forget_list_head.next);
-+	link = fiq->forget_list_head.next;
-+	BUG_ON(link->next);
-+	fiq->forget_list_head.next = NULL;
-+	fiq->forget_list_tail = &fiq->forget_list_head;
-+
-+	unique = fuse_get_unique(fiq);
-+
-+	fs = fiq->priv;
-+	fsvq = &fs->vqs[VQ_HIPRIO];
-+	spin_unlock(&fiq->waitq.lock);
-+
-+	/* Allocate a buffer for the request */
-+	forget = kmalloc(sizeof(*forget), GFP_ATOMIC);
-+	if (!forget) {
-+		pr_err("virtio-fs: dropped FORGET: kmalloc failed\n");
-+		goto out; /* TODO avoid dropping it? */
-+	}
-+
-+	forget->ih = (struct fuse_in_header){
-+		.opcode = FUSE_FORGET,
-+		.nodeid = link->forget_one.nodeid,
-+		.unique = unique,
-+		.len = sizeof(*forget),
-+	};
-+	forget->arg = (struct fuse_forget_in){
-+		.nlookup = link->forget_one.nlookup,
-+	};
-+
-+	sg_init_one(&sg, forget, sizeof(*forget));
-+
-+	/* Enqueue the request */
-+	vq = fsvq->vq;
-+	dev_dbg(&vq->vdev->dev, "%s\n", __func__);
-+	fpq = vq_to_fpq(vq);
-+	spin_lock(&fpq->lock);
-+
-+	ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
-+	if (ret < 0) {
-+		if (ret == -ENOMEM || ret == -ENOSPC) {
-+			pr_debug("virtio-fs: Could not queue FORGET: err=%d."
-+				 " Will try later.\n", ret);
-+			list_add_tail(&forget->list, &fsvq->queued_reqs);
-+			schedule_delayed_work(&fsvq->dispatch_work,
-+					msecs_to_jiffies(1));
-+		} else {
-+			pr_debug("virtio-fs: Could not queue FORGET: err=%d."
-+				 " Dropping it.\n", ret);
-+			kfree(forget);
-+		}
-+		spin_unlock(&fpq->lock);
-+		goto out;
-+	}
-+
-+	notify = virtqueue_kick_prepare(vq);
-+
-+	spin_unlock(&fpq->lock);
-+
-+	if (notify)
-+		virtqueue_notify(vq);
-+out:
-+	kfree(link);
-+}
-+
-+static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
-+__releases(fiq->waitq.lock)
-+{
-+	/* TODO */
-+	spin_unlock(&fiq->waitq.lock);
-+}
-+
-+/* Return the number of scatter-gather list elements required */
-+static unsigned sg_count_fuse_req(struct fuse_req *req)
-+{
-+	unsigned total_sgs = 1 /* fuse_in_header */;
-+
-+	if (req->in.numargs - req->in.argpages)
-+		total_sgs += 1;
-+
-+	if (req->in.argpages)
-+		total_sgs += req->num_pages;
-+
-+	if (!test_bit(FR_ISREPLY, &req->flags))
-+		return total_sgs;
-+
-+	total_sgs += 1 /* fuse_out_header */;
-+
-+	if (req->out.numargs - req->out.argpages)
-+		total_sgs += 1;
-+
-+	if (req->out.argpages)
-+		total_sgs += req->num_pages;
-+
-+	return total_sgs;
-+}
-+
-+/* Add pages to scatter-gather list and return number of elements used */
-+static unsigned sg_init_fuse_pages(struct scatterlist *sg,
-+				   struct page **pages,
-+				   struct fuse_page_desc *page_descs,
-+				   unsigned num_pages)
-+{
-+	unsigned i;
-+
-+	for (i = 0; i < num_pages; i++) {
-+		sg_init_table(&sg[i], 1);
-+		sg_set_page(&sg[i], pages[i],
-+			    page_descs[i].length,
-+			    page_descs[i].offset);
-+	}
-+
-+	return i;
-+}
-+
-+/* Add args to scatter-gather list and return number of elements used */
-+static unsigned sg_init_fuse_args(struct scatterlist *sg,
-+				  struct fuse_req *req,
-+				  struct fuse_arg *args,
-+				  unsigned numargs,
-+				  bool argpages,
-+				  void *argbuf,
-+				  unsigned *len_used)
-+{
-+	unsigned total_sgs = 0;
-+	unsigned len;
-+
-+	len = fuse_len_args(numargs - argpages, args);
-+	if (len)
-+		sg_init_one(&sg[total_sgs++], argbuf, len);
-+
-+	if (argpages)
-+		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
-+						req->pages,
-+						req->page_descs,
-+						req->num_pages);
-+
-+	if (len_used)
-+		*len_used = len;
-+
-+	return total_sgs;
-+}
-+
-+/* Add a request to a virtqueue and kick the device */
-+static int virtio_fs_enqueue_req(struct virtqueue *vq, struct fuse_req *req)
-+{
-+	struct scatterlist *stack_sgs[6 /* requests need at least 4 elements */];
-+	struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
-+	struct scatterlist **sgs = stack_sgs;
-+	struct scatterlist *sg = stack_sg;
-+	struct fuse_pqueue *fpq;
-+	unsigned argbuf_used = 0;
-+	unsigned out_sgs = 0;
-+	unsigned in_sgs = 0;
-+	unsigned total_sgs;
-+	unsigned i;
-+	int ret;
-+	bool notify;
-+
-+	/* Does the sglist fit on the stack? */
-+	total_sgs = sg_count_fuse_req(req);
-+	if (total_sgs > ARRAY_SIZE(stack_sgs)) {
-+		sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
-+		sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
-+		if (!sgs || !sg) {
-+			ret = -ENOMEM;
-+			goto out;
-+		}
-+	}
-+
-+	/* Use a bounce buffer since stack args cannot be mapped */
-+	ret = copy_args_to_argbuf(req);
-+	if (ret < 0)
-+		goto out;
-+
-+	/* Request elements */
-+	sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
-+	out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
-+				     (struct fuse_arg *)req->in.args,
-+				     req->in.numargs, req->in.argpages,
-+				     req->argbuf, &argbuf_used);
-+
-+	/* Reply elements */
-+	if (test_bit(FR_ISREPLY, &req->flags)) {
-+		sg_init_one(&sg[out_sgs + in_sgs++],
-+			    &req->out.h, sizeof(req->out.h));
-+		in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
-+					    req->out.args, req->out.numargs,
-+					    req->out.argpages,
-+					    req->argbuf + argbuf_used, NULL);
-+	}
-+
-+	BUG_ON(out_sgs + in_sgs != total_sgs);
-+
-+	for (i = 0; i < total_sgs; i++)
-+		sgs[i] = &sg[i];
-+
-+	fpq = vq_to_fpq(vq);
-+	spin_lock(&fpq->lock);
-+
-+	ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
-+	if (ret < 0) {
-+		/* TODO handle full virtqueue */
-+		spin_unlock(&fpq->lock);
-+		goto out;
-+	}
-+
-+	notify = virtqueue_kick_prepare(vq);
-+
-+	spin_unlock(&fpq->lock);
-+
-+	if (notify)
-+		virtqueue_notify(vq);
-+
-+out:
-+	if (ret < 0 && req->argbuf) {
-+		kfree(req->argbuf);
-+		req->argbuf = NULL;
-+	}
-+	if (sgs != stack_sgs) {
-+		kfree(sgs);
-+		kfree(sg);
-+	}
-+
-+	return ret;
-+}
-+
-+static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
-+__releases(fiq->waitq.lock)
-+{
-+	unsigned queue_id = VQ_REQUEST; /* TODO multiqueue */
-+	struct virtio_fs *fs;
-+	struct fuse_conn *fc;
-+	struct fuse_req *req;
-+	struct fuse_pqueue *fpq;
-+	int ret;
-+
-+	BUG_ON(list_empty(&fiq->pending));
-+	req = list_last_entry(&fiq->pending, struct fuse_req, list);
-+	clear_bit(FR_PENDING, &req->flags);
-+	list_del_init(&req->list);
-+	BUG_ON(!list_empty(&fiq->pending));
-+	spin_unlock(&fiq->waitq.lock);
-+
-+	fs = fiq->priv;
-+	fc = fs->vqs[queue_id].fud->fc;
-+
-+	dev_dbg(&fs->vqs[queue_id].vq->vdev->dev,
-+		"%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
-+		__func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid,
-+		req->in.h.len, fuse_len_args(req->out.numargs, req->out.args));
-+
-+	/* TODO put request onto fpq->io list? */
-+
-+	fpq = &fs->vqs[queue_id].fud->pq;
-+	spin_lock(&fpq->lock);
-+	if (!fpq->connected) {
-+		spin_unlock(&fpq->lock);
-+		req->out.h.error = -ENODEV;
-+		printk(KERN_ERR "%s: disconnected\n", __func__);
-+		fuse_request_end(fc, req);
-+		return;
-+	}
-+	list_add_tail(&req->list, &fpq->processing);
-+	spin_unlock(&fpq->lock);
-+	set_bit(FR_SENT, &req->flags);
-+	/* matches barrier in request_wait_answer() */
-+	smp_mb__after_atomic();
-+	/* TODO check for FR_INTERRUPTED? */
-+
-+	ret = virtio_fs_enqueue_req(fs->vqs[queue_id].vq, req);
-+	if (ret < 0) {
-+		req->out.h.error = ret;
-+		printk(KERN_ERR "%s: virtio_fs_enqueue_req failed %d\n",
-+			__func__, ret);
-+		fuse_request_end(fc, req);
-+		return;
-+	}
-+}
-+
-+const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
-+	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
-+	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
-+	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
-+};
-+
-+static int virtio_fs_fill_super(struct super_block *sb, void *data,
-+				int silent)
-+{
-+	struct fuse_mount_data d;
-+	struct fuse_conn *fc;
-+	struct virtio_fs *fs;
-+	int is_bdev = sb->s_bdev != NULL;
-+	unsigned int i;
-+	int err;
-+	struct fuse_req *init_req;
-+
-+	err = -EINVAL;
-+	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
-+		goto err;
-+	if (d.fd_present) {
-+		printk(KERN_ERR "virtio-fs: fd option cannot be used\n");
-+		goto err;
-+	}
-+	if (!d.tag_present) {
-+		printk(KERN_ERR "virtio-fs: missing tag option\n");
-+		goto err;
-+	}
-+
-+	fs = virtio_fs_find_instance(d.tag);
-+	if (!fs) {
-+		printk(KERN_ERR "virtio-fs: tag not found\n");
-+		err = -ENOENT;
-+		goto err;
-+	}
-+
-+	/* TODO lock */
-+	if (fs->vqs[VQ_REQUEST].fud) {
-+		printk(KERN_ERR "virtio-fs: device already in use\n");
-+		err = -EBUSY;
-+		goto err;
-+	}
-+
-+	err = -ENOMEM;
-+	/* Allocate fuse_dev for hiprio and notification queues */
-+	for (i = 0; i < VQ_REQUEST; i++) {
-+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
-+
-+		fsvq->fud = fuse_dev_alloc();
-+		if (!fsvq->fud)
-+			goto err_free_fuse_devs;
-+	}
-+
-+	init_req = fuse_request_alloc(0);
-+	if (!init_req)
-+		goto err;
-+	__set_bit(FR_BACKGROUND, &init_req->flags);
-+
-+	d.dax_dev = d.dax ? fs->dax_dev : NULL;
-+	d.fiq_ops = &virtio_fs_fiq_ops;
-+	d.fiq_priv = fs;
-+	d.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
-+	d.destroy = true; /* Send destroy request on unmount */
-+	err = fuse_fill_super_common(sb, &d);
-+	if (err < 0)
-+		goto err_free_init_req;
-+
-+	fc = fs->vqs[VQ_REQUEST].fud->fc;
-+
-+	/* TODO take fuse_mutex around this loop? */
-+	for (i = 0; i < fs->nvqs; i++) {
-+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
-+
-+		if (i == VQ_REQUEST)
-+			continue; /* already initialized */
-+		fuse_dev_install(fsvq->fud, fc);
-+		atomic_inc(&fc->dev_count);
-+	}
-+
-+	fuse_send_init(fc, init_req);
-+	return 0;
-+
-+err_free_init_req:
-+	fuse_request_free(init_req);
-+err_free_fuse_devs:
-+	for (i = 0; i < fs->nvqs; i++) {
-+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
-+		fuse_dev_free(fsvq->fud);
-+	}
-+err:
-+	return err;
-+}
-+
-+static void virtio_kill_sb(struct super_block *sb)
-+{
-+	struct fuse_conn *fc = get_fuse_conn_super(sb);
-+	fuse_kill_sb_anon(sb);
-+	if (fc) {
-+		struct virtio_fs *vfs = fc->iq.priv;
-+		virtio_fs_free_devs(vfs);
-+	}
-+}
-+
-+static struct dentry *virtio_fs_mount(struct file_system_type *fs_type,
-+				      int flags, const char *dev_name,
-+				      void *raw_data)
-+{
-+	return mount_nodev(fs_type, flags, raw_data, virtio_fs_fill_super);
-+}
-+
-+static struct file_system_type virtio_fs_type = {
-+	.owner		= THIS_MODULE,
-+	.name		= KBUILD_MODNAME,
-+	.mount		= virtio_fs_mount,
-+	.kill_sb	= virtio_kill_sb,
-+};
-+
-+static int __init virtio_fs_init(void)
-+{
-+	int ret;
-+
-+	ret = register_virtio_driver(&virtio_fs_driver);
-+	if (ret < 0)
-+		return ret;
-+
-+	ret = register_filesystem(&virtio_fs_type);
-+	if (ret < 0) {
-+		unregister_virtio_driver(&virtio_fs_driver);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+module_init(virtio_fs_init);
-+
-+static void __exit virtio_fs_exit(void)
-+{
-+	unregister_filesystem(&virtio_fs_type);
-+	unregister_virtio_driver(&virtio_fs_driver);
-+}
-+module_exit(virtio_fs_exit);
-+
-+MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
-+MODULE_DESCRIPTION("Virtio Filesystem");
-+MODULE_LICENSE("GPL");
-+MODULE_ALIAS_FS(KBUILD_MODNAME);
-+MODULE_DEVICE_TABLE(virtio, id_table);
-diff --git a/fs/splice.c b/fs/splice.c
-index 485e409ef..c74f18098 100644
---- a/fs/splice.c
-+++ b/fs/splice.c
-@@ -365,7 +365,7 @@ static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
- 	return res;
- }
- 
--static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
-+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
- 				 struct pipe_inode_info *pipe, size_t len,
- 				 unsigned int flags)
- {
-@@ -429,6 +429,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
- 	iov_iter_advance(&to, copied);	/* truncates and discards */
- 	return res;
- }
-+EXPORT_SYMBOL(default_file_splice_read);
- 
- /*
-  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
-diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
-index b69786694..c97f8a0cb 100644
---- a/fs/xfs/xfs_aops.c
-+++ b/fs/xfs/xfs_aops.c
-@@ -953,7 +953,7 @@ xfs_dax_writepages(
- {
- 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- 	return dax_writeback_mapping_range(mapping,
--			xfs_find_bdev_for_inode(mapping->host), wbc);
-+			xfs_find_bdev_for_inode(mapping->host), NULL, wbc);
- }
- 
- STATIC int
-diff --git a/include/linux/dax.h b/include/linux/dax.h
-index 450b28db9..a8461841f 100644
---- a/include/linux/dax.h
-+++ b/include/linux/dax.h
-@@ -85,7 +85,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
- 
- struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
- int dax_writeback_mapping_range(struct address_space *mapping,
--		struct block_device *bdev, struct writeback_control *wbc);
-+		struct block_device *bdev, struct dax_device *dax_dev,
-+		struct writeback_control *wbc);
- 
- struct page *dax_layout_busy_page(struct address_space *mapping);
- bool dax_lock_mapping_entry(struct page *page);
-@@ -117,7 +118,8 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
- }
- 
- static inline int dax_writeback_mapping_range(struct address_space *mapping,
--		struct block_device *bdev, struct writeback_control *wbc)
-+		struct block_device *bdev, struct dax_device *dax_dev,
-+		struct writeback_control *wbc)
- {
- 	return -EOPNOTSUPP;
- }
-diff --git a/include/linux/fs.h b/include/linux/fs.h
-index d4e1b43a5..374122b5b 100644
---- a/include/linux/fs.h
-+++ b/include/linux/fs.h
-@@ -3000,6 +3000,8 @@ extern void block_sync_page(struct page *page);
- /* fs/splice.c */
- extern ssize_t generic_file_splice_read(struct file *, loff_t *,
- 		struct pipe_inode_info *, size_t, unsigned int);
-+extern ssize_t default_file_splice_read(struct file *, loff_t *,
-+		struct pipe_inode_info *, size_t, unsigned int);
- extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
- 		struct file *, loff_t *, size_t, unsigned int);
- extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
-diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
-index 32baf8e26..8f85d1d8a 100644
---- a/include/linux/virtio_config.h
-+++ b/include/linux/virtio_config.h
-@@ -10,6 +10,11 @@
- 
- struct irq_affinity;
- 
-+struct virtio_shm_region {
-+       u64 addr;
-+       u64 len;
-+};
-+
- /**
-  * virtio_config_ops - operations for configuring a virtio device
-  * @get: read the value of a configuration field
-@@ -60,6 +65,7 @@ struct irq_affinity;
-  *      the caller can then copy.
-  * @set_vq_affinity: set the affinity for a virtqueue.
-  * @get_vq_affinity: get the affinity for a virtqueue (optional).
-+ * @get_shm_region: get a shared memory region based on the index.
-  */
- typedef void vq_callback_t(struct virtqueue *);
- struct virtio_config_ops {
-@@ -83,6 +89,8 @@ struct virtio_config_ops {
- 			       const struct cpumask *cpu_mask);
- 	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
- 			int index);
-+	bool (*get_shm_region)(struct virtio_device *vdev,
-+			       struct virtio_shm_region *region, u8 id);
- };
- 
- /* If driver didn't advertise the feature, it will never appear. */
-@@ -245,6 +253,15 @@ int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
- 	return 0;
- }
- 
-+static inline
-+bool virtio_get_shm_region(struct virtio_device *vdev,
-+                         struct virtio_shm_region *region, u8 id)
-+{
-+	if (!vdev->config->get_shm_region)
-+		return false;
-+	return vdev->config->get_shm_region(vdev, region, id);
-+}
-+
- static inline bool virtio_is_little_endian(struct virtio_device *vdev)
- {
- 	return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
-diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
-index 2170e58a2..2f35b3791 100644
---- a/include/uapi/linux/fuse.h
-+++ b/include/uapi/linux/fuse.h
-@@ -383,6 +383,8 @@ enum fuse_opcode {
- 	FUSE_READDIRPLUS   = 44,
- 	FUSE_RENAME2       = 45,
- 	FUSE_LSEEK         = 46,
-+	FUSE_SETUPMAPPING	= 48,
-+	FUSE_REMOVEMAPPING	= 49,
- 
- 	/* CUSE specific operations */
- 	CUSE_INIT          = 4096,
-@@ -794,4 +796,36 @@ struct fuse_lseek_out {
- 	uint64_t	offset;
- };
- 
-+#define FUSE_SETUPMAPPING_ENTRIES 8
-+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
-+#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
-+struct fuse_setupmapping_in {
-+	/* An already open handle */
-+	uint64_t	fh;
-+	/* Offset into the file to start the mapping */
-+	uint64_t	foffset;
-+	/* Length of mapping required */
-+	uint64_t	len;
-+	/* Flags, FUSE_SETUPMAPPING_FLAG_* */
-+	uint64_t	flags;
-+	/* Offset in Memory Window */
-+	uint64_t	moffset;
-+};
-+
-+struct fuse_setupmapping_out {
-+	/* Offsets into the cache of mappings */
-+	uint64_t	coffset[FUSE_SETUPMAPPING_ENTRIES];
-+        /* Lengths of each mapping */
-+        uint64_t	len[FUSE_SETUPMAPPING_ENTRIES];
-+};
-+
-+struct fuse_removemapping_in {
-+        /* An already open handle */
-+        uint64_t	fh;
-+	/* Offset into the dax window start the unmapping */
-+	uint64_t        moffset;
-+        /* Length of mapping required */
-+        uint64_t	len;
-+};
-+
- #endif /* _LINUX_FUSE_H */
-diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
-new file mode 100644
-index 000000000..d4bb54956
---- /dev/null
-+++ b/include/uapi/linux/virtio_fs.h
-@@ -0,0 +1,44 @@
-+#ifndef _UAPI_LINUX_VIRTIO_FS_H
-+#define _UAPI_LINUX_VIRTIO_FS_H
-+/* This header is BSD licensed so anyone can use the definitions to implement
-+ * compatible drivers/servers.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ *    notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ *    notice, this list of conditions and the following disclaimer in the
-+ *    documentation and/or other materials provided with the distribution.
-+ * 3. Neither the name of IBM nor the names of its contributors
-+ *    may be used to endorse or promote products derived from this software
-+ *    without specific prior written permission.
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE. */
-+#include <linux/types.h>
-+#include <linux/virtio_ids.h>
-+#include <linux/virtio_config.h>
-+#include <linux/virtio_types.h>
-+
-+struct virtio_fs_config {
-+	/* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */
-+	__u8 tag[36];
-+
-+	/* Number of request queues */
-+	__u32 num_queues;
-+} __attribute__((packed));
-+
-+/* For the id field in virtio_pci_shm_cap */
-+#define VIRTIO_FS_SHMCAP_ID_CACHE 0
-+
-+#endif /* _UAPI_LINUX_VIRTIO_FS_H */
-diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
-index 6d5c3b2d4..884b0e273 100644
---- a/include/uapi/linux/virtio_ids.h
-+++ b/include/uapi/linux/virtio_ids.h
-@@ -43,5 +43,6 @@
- #define VIRTIO_ID_INPUT        18 /* virtio input */
- #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
- #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
-+#define VIRTIO_ID_FS           26 /* virtio filesystem */
- 
- #endif /* _LINUX_VIRTIO_IDS_H */
-diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
-index c4b09689a..0650f91be 100644
---- a/include/uapi/linux/virtio_mmio.h
-+++ b/include/uapi/linux/virtio_mmio.h
-@@ -122,6 +122,17 @@
- #define VIRTIO_MMIO_QUEUE_USED_LOW	0x0a0
- #define VIRTIO_MMIO_QUEUE_USED_HIGH	0x0a4
- 
-+/* Shared memory region id */
-+#define VIRTIO_MMIO_SHM_SEL             0x0ac
-+
-+/* Shared memory region length, 64 bits in two halves */
-+#define VIRTIO_MMIO_SHM_LEN_LOW         0x0b0
-+#define VIRTIO_MMIO_SHM_LEN_HIGH        0x0b4
-+
-+/* Shared memory region base address, 64 bits in two halves */
-+#define VIRTIO_MMIO_SHM_BASE_LOW        0x0b8
-+#define VIRTIO_MMIO_SHM_BASE_HIGH       0x0bc
-+
- /* Configuration atomicity value */
- #define VIRTIO_MMIO_CONFIG_GENERATION	0x0fc
- 
-diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
-index 90007a1ab..31841a60a 100644
---- a/include/uapi/linux/virtio_pci.h
-+++ b/include/uapi/linux/virtio_pci.h
-@@ -113,6 +113,8 @@
- #define VIRTIO_PCI_CAP_DEVICE_CFG	4
- /* PCI configuration access */
- #define VIRTIO_PCI_CAP_PCI_CFG		5
-+/* Additional shared memory capability */
-+#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
- 
- /* This is the PCI capability header: */
- struct virtio_pci_cap {
-@@ -163,6 +165,14 @@ struct virtio_pci_cfg_cap {
- 	__u8 pci_cfg_data[4]; /* Data for BAR access. */
- };
- 
-+/* Fields in VIRTIO_PCI_CAP_SHARED_MEMORY_CFG */
-+struct virtio_pci_shm_cap {
-+       struct virtio_pci_cap cap;
-+       __le32 offset_hi;             /* Most sig 32 bits of offset */
-+       __le32 length_hi;             /* Most sig 32 bits of length */
-+        __u8   id;                    /* To distinguish shm chunks */
-+};
-+
- /* Macro versions of offsets for the Old Timers! */
- #define VIRTIO_PCI_CAP_VNDR		0
- #define VIRTIO_PCI_CAP_NEXT		1
--- 
-2.19.2
-