diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2020-08-19 18:19:46 -0400 |
---|---|---|
committer | Miklos Szeredi <mszeredi@redhat.com> | 2020-09-10 11:39:22 +0200 |
commit | 22f3787e9d95e72d1f09795f294fb010e2998f43 (patch) | |
tree | 3480623aa1da15cb0839b126d8fab736643b9d2b /fs/fuse | |
parent | f4fd4ae354ba23c948afb0ee3386182acb96d481 (diff) | |
download | lwn-22f3787e9d95e72d1f09795f294fb010e2998f43.tar.gz lwn-22f3787e9d95e72d1f09795f294fb010e2998f43.zip |
virtiofs: set up virtio_fs dax_device
Setup a dax device.
Use the shm capability to find the cache entry and map it.
The DAX window is accessed by the fs/dax.c infrastructure and must have
struct pages (at least on x86). Use devm_memremap_pages() to map the
DAX window PCI BAR and allocate struct page.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Diffstat (limited to 'fs/fuse')
-rw-r--r-- | fs/fuse/virtio_fs.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 47ecdc15f25d..f31a59f74475 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -5,12 +5,16 @@ */ #include <linux/fs.h> +#include <linux/dax.h> +#include <linux/pci.h> +#include <linux/pfn_t.h> #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_fs.h> #include <linux/delay.h> #include <linux/fs_context.h> #include <linux/highmem.h> +#include <linux/uio.h> #include "fuse_i.h" /* List of virtio-fs device instances and a lock for the list. Also provides @@ -49,6 +53,12 @@ struct virtio_fs { struct virtio_fs_vq *vqs; unsigned int nvqs; /* number of virtqueues */ unsigned int num_request_queues; /* number of request queues */ + struct dax_device *dax_dev; + + /* DAX memory window where file contents are mapped */ + void *window_kaddr; + phys_addr_t window_phys_addr; + size_t window_len; }; struct virtio_fs_forget_req { @@ -686,6 +696,130 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, vdev->config->del_vqs(vdev); } +/* Map a window offset to a page frame number. The window offset will have + * been produced by .iomap_begin(), which maps a file offset to a window + * offset. + */ +static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct virtio_fs *fs = dax_get_private(dax_dev); + phys_addr_t offset = PFN_PHYS(pgoff); + size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; + + if (kaddr) + *kaddr = fs->window_kaddr + offset; + if (pfn) + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, + PFN_DEV | PFN_MAP); + return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; +} + +static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} + +static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + +static int virtio_fs_zero_page_range(struct dax_device *dax_dev, + pgoff_t pgoff, size_t nr_pages) +{ + long rc; + void *kaddr; + + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + if (rc < 0) + return rc; + memset(kaddr, 0, nr_pages << PAGE_SHIFT); + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); + return 0; +} + +static const struct dax_operations virtio_fs_dax_ops = { + .direct_access = virtio_fs_direct_access, + .copy_from_iter = virtio_fs_copy_from_iter, + .copy_to_iter = virtio_fs_copy_to_iter, + .zero_page_range = virtio_fs_zero_page_range, +}; + +static void virtio_fs_cleanup_dax(void *data) +{ + struct dax_device *dax_dev = data; + + kill_dax(dax_dev); + put_dax(dax_dev); +} + +static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) +{ + struct virtio_shm_region cache_reg; + struct dev_pagemap *pgmap; + bool have_cache; + + if (!IS_ENABLED(CONFIG_FUSE_DAX)) + return 0; + + /* Get cache region */ + have_cache = virtio_get_shm_region(vdev, &cache_reg, + (u8)VIRTIO_FS_SHMCAP_ID_CACHE); + if (!have_cache) { + dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); + return 0; + } + + if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, + dev_name(&vdev->dev))) { + dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", + cache_reg.addr, cache_reg.len); + return -EBUSY; + } + + dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, + cache_reg.addr); + + pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_FS_DAX; + + /* Ideally we would directly use the PCI BAR resource but + * devm_memremap_pages() wants its own copy in pgmap. So + * initialize a struct resource from scratch (only the start + * and end fields will be used). + */ + pgmap->res = (struct resource){ + .name = "virtio-fs dax window", + .start = (phys_addr_t) cache_reg.addr, + .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, + }; + + fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); + if (IS_ERR(fs->window_kaddr)) + return PTR_ERR(fs->window_kaddr); + + fs->window_phys_addr = (phys_addr_t) cache_reg.addr; + fs->window_len = (phys_addr_t) cache_reg.len; + + dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", + __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); + + fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); + if (IS_ERR(fs->dax_dev)) + return PTR_ERR(fs->dax_dev); + + return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, + fs->dax_dev); +} + static int virtio_fs_probe(struct virtio_device *vdev) { struct virtio_fs *fs; @@ -707,6 +841,10 @@ static int virtio_fs_probe(struct virtio_device *vdev) /* TODO vq affinity */ + ret = virtio_fs_setup_dax(vdev, fs); + if (ret < 0) + goto out_vqs; + /* Bring the device online in case the filesystem is mounted and * requests need to be sent before we return. */ |