From afd586f0d06ce3d81b7c474499630fec88833828 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:37 +0100 Subject: dax: remove CONFIG_DAX_DRIVER CONFIG_DAX_DRIVER only selects CONFIG_DAX now, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20211129102203.2243509-4-hch@lst.de Signed-off-by: Dan Williams --- fs/fuse/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5..038ed0b9aaa5 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -45,7 +45,7 @@ config FUSE_DAX select INTERVAL_TREE depends on VIRTIO_FS depends on FS_DAX - depends on DAX_DRIVER + depends on DAX help This allows bypassing guest page cache and allows mapping host page cache directly in guest address space. -- cgit v1.2.3 From fb08a1908cb119a4585611d91461ab6d27756b14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:38 +0100 Subject: dax: simplify the dax_device <-> gendisk association Replace the dax_host_hash with an xarray indexed by the pointer value of the gendisk, and require explicitly calls from the block drivers that want to associate their gendisk with a dax_device. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-5-hch@lst.de Signed-off-by: Dan Williams --- drivers/dax/bus.c | 6 +-- drivers/dax/super.c | 109 +++++++++++-------------------------------- drivers/md/dm.c | 6 ++- drivers/nvdimm/pmem.c | 10 +++- drivers/s390/block/dcssblk.c | 11 +++-- fs/fuse/virtio_fs.c | 2 +- include/linux/dax.h | 19 ++++++-- 7 files changed, 66 insertions(+), 97 deletions(-) (limited to 'fs/fuse') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 452cf7860926..6683d42c32c5 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1321,10 +1321,10 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) } /* - * No 'host' or dax_operations since there is no access to this - * device outside of mmap of the resulting character device. + * No dax_operations since there is no access to this device outside of + * mmap of the resulting character device. */ - dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); + dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto err_alloc_dax; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e20d0cef10a1..bf77c3da5d56 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -7,10 +7,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -21,15 +19,12 @@ * struct dax_device - anchor object for dax services * @inode: core vfs * @cdev: optional character interface for "device dax" - * @host: optional name for lookups where the device path is not available * @private: dax driver private data * @flags: state and boolean properties */ struct dax_device { - struct hlist_node list; struct inode inode; struct cdev cdev; - const char *host; void *private; unsigned long flags; const struct dax_operations *ops; @@ -42,10 +37,6 @@ static DEFINE_IDA(dax_minor_ida); static struct kmem_cache *dax_cache __read_mostly; static struct super_block *dax_superblock __read_mostly; -#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) -static struct hlist_head dax_host_list[DAX_HASH_SIZE]; -static DEFINE_SPINLOCK(dax_host_lock); - int dax_read_lock(void) { return srcu_read_lock(&dax_srcu); @@ -58,13 +49,22 @@ void dax_read_unlock(int id) } EXPORT_SYMBOL_GPL(dax_read_unlock); -static int dax_host_hash(const char *host) +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) +#include + +static DEFINE_XARRAY(dax_hosts); + +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { - return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; + return xa_insert(&dax_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(dax_add_host); -#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) -#include +void dax_remove_host(struct gendisk *disk) +{ + xa_erase(&dax_hosts, (unsigned long)disk); +} +EXPORT_SYMBOL_GPL(dax_remove_host); int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, pgoff_t *pgoff) @@ -81,41 +81,24 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, EXPORT_SYMBOL(bdev_dax_pgoff); /** - * dax_get_by_host() - temporary lookup mechanism for filesystem-dax - * @host: alternate name for the device registered by a dax driver + * fs_dax_get_by_bdev() - temporary lookup mechanism for filesystem-dax + * @bdev: block device to find a dax_device for */ -static struct dax_device *dax_get_by_host(const char *host) +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { - struct dax_device *dax_dev, *found = NULL; - int hash, id; + struct dax_device *dax_dev; + int id; - if (!host) + if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; - hash = dax_host_hash(host); - id = dax_read_lock(); - spin_lock(&dax_host_lock); - hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { - if (!dax_alive(dax_dev) - || strcmp(host, dax_dev->host) != 0) - continue; - - if (igrab(&dax_dev->inode)) - found = dax_dev; - break; - } - spin_unlock(&dax_host_lock); + dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk); + if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode)) + dax_dev = NULL; dax_read_unlock(id); - return found; -} - -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) -{ - if (!blk_queue_dax(bdev->bd_disk->queue)) - return NULL; - return dax_get_by_host(bdev->bd_disk->disk_name); + return dax_dev; } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); @@ -361,12 +344,7 @@ void kill_dax(struct dax_device *dax_dev) return; clear_bit(DAXDEV_ALIVE, &dax_dev->flags); - synchronize_srcu(&dax_srcu); - - spin_lock(&dax_host_lock); - hlist_del_init(&dax_dev->list); - spin_unlock(&dax_host_lock); } EXPORT_SYMBOL_GPL(kill_dax); @@ -398,8 +376,6 @@ static struct dax_device *to_dax_dev(struct inode *inode) static void dax_free_inode(struct inode *inode) { struct dax_device *dax_dev = to_dax_dev(inode); - kfree(dax_dev->host); - dax_dev->host = NULL; if (inode->i_rdev) ida_simple_remove(&dax_minor_ida, iminor(inode)); kmem_cache_free(dax_cache, dax_dev); @@ -474,54 +450,25 @@ static struct dax_device *dax_dev_get(dev_t devt) return dax_dev; } -static void dax_add_host(struct dax_device *dax_dev, const char *host) -{ - int hash; - - /* - * Unconditionally init dax_dev since it's coming from a - * non-zeroed slab cache - */ - INIT_HLIST_NODE(&dax_dev->list); - dax_dev->host = host; - if (!host) - return; - - hash = dax_host_hash(host); - spin_lock(&dax_host_lock); - hlist_add_head(&dax_dev->list, &dax_host_list[hash]); - spin_unlock(&dax_host_lock); -} - -struct dax_device *alloc_dax(void *private, const char *__host, - const struct dax_operations *ops, unsigned long flags) +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, + unsigned long flags) { struct dax_device *dax_dev; - const char *host; dev_t devt; int minor; - if (ops && !ops->zero_page_range) { - pr_debug("%s: error: device does not provide dax" - " operation zero_page_range()\n", - __host ? __host : "Unknown"); + if (WARN_ON_ONCE(ops && !ops->zero_page_range)) return ERR_PTR(-EINVAL); - } - - host = kstrdup(__host, GFP_KERNEL); - if (__host && !host) - return ERR_PTR(-ENOMEM); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); if (minor < 0) - goto err_minor; + return ERR_PTR(-ENOMEM); devt = MKDEV(MAJOR(dax_devt), minor); dax_dev = dax_dev_get(devt); if (!dax_dev) goto err_dev; - dax_add_host(dax_dev, host); dax_dev->ops = ops; dax_dev->private = private; if (flags & DAXDEV_F_SYNC) @@ -531,8 +478,6 @@ struct dax_device *alloc_dax(void *private, const char *__host, err_dev: ida_simple_remove(&dax_minor_ida, minor); - err_minor: - kfree(host); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(alloc_dax); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b93fcc91176e..a8c650276b32 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1683,6 +1683,7 @@ static void cleanup_mapped_device(struct mapped_device *md) bioset_exit(&md->io_bs); if (md->dax_dev) { + dax_remove_host(md->disk); kill_dax(md->dax_dev); put_dax(md->dax_dev); md->dax_dev = NULL; @@ -1784,12 +1785,13 @@ static struct mapped_device *alloc_dev(int minor) sprintf(md->disk->disk_name, "dm-%d", minor); if (IS_ENABLED(CONFIG_FS_DAX)) { - md->dax_dev = alloc_dax(md, md->disk->disk_name, - &dm_dax_ops, 0); + md->dax_dev = alloc_dax(md, &dm_dax_ops, 0); if (IS_ERR(md->dax_dev)) { md->dax_dev = NULL; goto bad; } + if (dax_add_host(md->dax_dev, md->disk)) + goto bad; } format_dev_t(md->name, MKDEV(_major, minor)); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index fe7ece1534e1..1018f0d44acb 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -379,6 +379,7 @@ static void pmem_release_disk(void *__pmem) { struct pmem_device *pmem = __pmem; + dax_remove_host(pmem->disk); kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); del_gendisk(pmem->disk); @@ -497,17 +498,20 @@ static int pmem_attach_disk(struct device *dev, if (is_nvdimm_sync(nd_region)) flags = DAXDEV_F_SYNC; - dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); + dax_dev = alloc_dax(pmem, &pmem_dax_ops, flags); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto out; } + rc = dax_add_host(dax_dev, disk); + if (rc) + goto out_cleanup_dax; dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; rc = device_add_disk(dev, disk, pmem_attribute_groups); if (rc) - goto out_cleanup_dax; + goto out_remove_host; if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; @@ -519,6 +523,8 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "'badblocks' notification disabled\n"); return 0; +out_remove_host: + dax_remove_host(pmem->disk); out_cleanup_dax: kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 27ab888b44d0..657e492f2bc2 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -687,18 +687,21 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char if (rc) goto put_dev; - dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, - &dcssblk_dax_ops, DAXDEV_F_SYNC); + dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops, + DAXDEV_F_SYNC); if (IS_ERR(dev_info->dax_dev)) { rc = PTR_ERR(dev_info->dax_dev); dev_info->dax_dev = NULL; goto put_dev; } + rc = dax_add_host(dev_info->dax_dev, dev_info->gd); + if (rc) + goto out_dax; get_device(&dev_info->dev); rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL); if (rc) - goto out_dax; + goto out_dax_host; switch (dev_info->segment_type) { case SEG_TYPE_SR: @@ -714,6 +717,8 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char rc = count; goto out; +out_dax_host: + dax_remove_host(dev_info->gd); out_dax: put_device(&dev_info->dev); kill_dax(dev_info->dax_dev); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4cfa4bc1f579..242cc1c0d7ed 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -850,7 +850,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); - fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); + fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops, 0); if (IS_ERR(fs->dax_dev)) return PTR_ERR(fs->dax_dev); diff --git a/include/linux/dax.h b/include/linux/dax.h index 8623caa67388..e2e9a67004cb 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -11,9 +11,11 @@ typedef unsigned long dax_entry_t; +struct dax_device; +struct gendisk; struct iomap_ops; struct iomap; -struct dax_device; + struct dax_operations { /* * direct_access: translate a device-relative @@ -39,8 +41,8 @@ struct dax_operations { }; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops, unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, + unsigned long flags); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -68,7 +70,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else -static inline struct dax_device *alloc_dax(void *private, const char *host, +static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, unsigned long flags) { /* @@ -107,6 +109,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); +void dax_remove_host(struct gendisk *disk); bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors); @@ -128,6 +132,13 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else +static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) +{ + return 0; +} +static inline void dax_remove_host(struct gendisk *disk) +{ +} #define generic_fsdax_supported NULL static inline bool dax_supported(struct dax_device *dax_dev, -- cgit v1.2.3 From 30c6828a17a572aeb9e3a3bacce05fdcf1106541 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:07 +0100 Subject: dax: remove the DAXDEV_F_SYNC flag Remove the DAXDEV_F_SYNC flag and thus the flags argument to alloc_dax and just let the drivers call set_dax_synchronous directly. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Gupta Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20211215084508.435401-4-hch@lst.de Signed-off-by: Dan Williams --- drivers/dax/bus.c | 3 ++- drivers/dax/super.c | 6 +----- drivers/md/dm.c | 2 +- drivers/nvdimm/pmem.c | 7 +++---- drivers/s390/block/dcssblk.c | 4 ++-- fs/fuse/virtio_fs.c | 2 +- include/linux/dax.h | 8 ++------ 7 files changed, 12 insertions(+), 20 deletions(-) (limited to 'fs/fuse') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 6683d42c32c5..da2a14d096d2 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1324,11 +1324,12 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) * No dax_operations since there is no access to this device outside of * mmap of the resulting character device. */ - dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC); + dax_dev = alloc_dax(dev_dax, NULL); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto err_alloc_dax; } + set_dax_synchronous(dax_dev); /* a device_dax instance is dead while the driver is not attached */ kill_dax(dax_dev); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e18155f43a63..e81d5ee57390 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -345,8 +345,7 @@ static struct dax_device *dax_dev_get(dev_t devt) return dax_dev; } -struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, - unsigned long flags) +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { struct dax_device *dax_dev; dev_t devt; @@ -366,9 +365,6 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, dax_dev->ops = ops; dax_dev->private = private; - if (flags & DAXDEV_F_SYNC) - set_dax_synchronous(dax_dev); - return dax_dev; err_dev: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e997c02bb0a..f4b972af1092 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1765,7 +1765,7 @@ static struct mapped_device *alloc_dev(int minor) sprintf(md->disk->disk_name, "dm-%d", minor); if (IS_ENABLED(CONFIG_FS_DAX)) { - md->dax_dev = alloc_dax(md, &dm_dax_ops, 0); + md->dax_dev = alloc_dax(md, &dm_dax_ops); if (IS_ERR(md->dax_dev)) { md->dax_dev = NULL; goto bad; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d225bcfa67cf..18b1d9c55831 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -400,7 +400,6 @@ static int pmem_attach_disk(struct device *dev, struct gendisk *disk; void *addr; int rc; - unsigned long flags = 0UL; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) @@ -493,13 +492,13 @@ static int pmem_attach_disk(struct device *dev, nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); disk->bb = &pmem->bb; - if (is_nvdimm_sync(nd_region)) - flags = DAXDEV_F_SYNC; - dax_dev = alloc_dax(pmem, &pmem_dax_ops, flags); + dax_dev = alloc_dax(pmem, &pmem_dax_ops); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto out; } + if (is_nvdimm_sync(nd_region)) + set_dax_synchronous(dax_dev); rc = dax_add_host(dax_dev, disk); if (rc) goto out_cleanup_dax; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index e65e83764d1c..10823debc09b 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -686,13 +686,13 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char if (rc) goto put_dev; - dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops, - DAXDEV_F_SYNC); + dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops); if (IS_ERR(dev_info->dax_dev)) { rc = PTR_ERR(dev_info->dax_dev); dev_info->dax_dev = NULL; goto put_dev; } + set_dax_synchronous(dev_info->dax_dev); rc = dax_add_host(dev_info->dax_dev, dev_info->gd); if (rc) goto out_dax; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 242cc1c0d7ed..5c03a0364a9b 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -850,7 +850,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); - fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops, 0); + fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); if (IS_ERR(fs->dax_dev)) return PTR_ERR(fs->dax_dev); diff --git a/include/linux/dax.h b/include/linux/dax.h index 3bd1fdb5d5f4..c04f46478e3b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -6,9 +6,6 @@ #include #include -/* Flag for synchronous flush */ -#define DAXDEV_F_SYNC (1UL << 0) - typedef unsigned long dax_entry_t; struct dax_device; @@ -42,8 +39,7 @@ struct dax_operations { }; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, - unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -64,7 +60,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, } #else static inline struct dax_device *alloc_dax(void *private, - const struct dax_operations *ops, unsigned long flags) + const struct dax_operations *ops) { /* * Callers should check IS_ENABLED(CONFIG_DAX) to know if this -- cgit v1.2.3 From 7ac5360cd4d02cc7e0eaf10867f599e041822f12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:08 +0100 Subject: dax: remove the copy_from_iter and copy_to_iter methods These methods indirect the actual DAX read/write path. In the end pmem uses magic flush and mc safe variants and fuse and dcssblk use plain ones while device mapper picks redirects to the underlying device. Add set_dax_nocache() and set_dax_nomc() APIs to control which copy routines are used to remove indirect call from the read/write fast path as well as a lot of boilerplate code. Signed-off-by: Christoph Hellwig Reviewed-by: Vivek Goyal [virtiofs] Link: https://lore.kernel.org/r/20211215084508.435401-5-hch@lst.de Signed-off-by: Dan Williams --- drivers/dax/bus.c | 2 ++ drivers/dax/super.c | 36 ++++++++++++++++--- drivers/md/dm-linear.c | 20 ----------- drivers/md/dm-log-writes.c | 80 ------------------------------------------- drivers/md/dm-stripe.c | 20 ----------- drivers/md/dm.c | 52 ++-------------------------- drivers/nvdimm/pmem.c | 20 ++--------- drivers/s390/block/dcssblk.c | 14 -------- fs/dax.c | 5 --- fs/fuse/virtio_fs.c | 16 --------- include/linux/dax.h | 9 ++--- include/linux/device-mapper.h | 4 --- 12 files changed, 41 insertions(+), 237 deletions(-) (limited to 'fs/fuse') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index da2a14d096d2..ee4568ef757c 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1330,6 +1330,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) goto err_alloc_dax; } set_dax_synchronous(dax_dev); + set_dax_nocache(dax_dev); + set_dax_nomc(dax_dev); /* a device_dax instance is dead while the driver is not attached */ kill_dax(dax_dev); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e81d5ee57390..e3029389d809 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -105,6 +105,10 @@ enum dax_device_flags { DAXDEV_WRITE_CACHE, /* flag to check if device supports synchronous flush */ DAXDEV_SYNC, + /* do not leave the caches dirty after writes */ + DAXDEV_NOCACHE, + /* handle CPU fetch exceptions during reads */ + DAXDEV_NOMC, }; /** @@ -146,9 +150,15 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, if (!dax_alive(dax_dev)) return 0; - return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); + /* + * The userspace address for the memory copy has already been validated + * via access_ok() in vfs_write, so use the 'no check' version to bypass + * the HARDENED_USERCOPY overhead. + */ + if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags)) + return _copy_from_iter_flushcache(addr, bytes, i); + return _copy_from_iter(addr, bytes, i); } -EXPORT_SYMBOL_GPL(dax_copy_from_iter); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) @@ -156,9 +166,15 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, if (!dax_alive(dax_dev)) return 0; - return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); + /* + * The userspace address for the memory copy has already been validated + * via access_ok() in vfs_red, so use the 'no check' version to bypass + * the HARDENED_USERCOPY overhead. + */ + if (test_bit(DAXDEV_NOMC, &dax_dev->flags)) + return _copy_mc_to_iter(addr, bytes, i); + return _copy_to_iter(addr, bytes, i); } -EXPORT_SYMBOL_GPL(dax_copy_to_iter); int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) @@ -220,6 +236,18 @@ void set_dax_synchronous(struct dax_device *dax_dev) } EXPORT_SYMBOL_GPL(set_dax_synchronous); +void set_dax_nocache(struct dax_device *dax_dev) +{ + set_bit(DAXDEV_NOCACHE, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(set_dax_nocache); + +void set_dax_nomc(struct dax_device *dax_dev) +{ + set_bit(DAXDEV_NOMC, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(set_dax_nomc); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 90de42f6743a..1b97a11d7151 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -180,22 +180,6 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } -static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); -} - -static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); -} - static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { @@ -206,8 +190,6 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, #else #define linear_dax_direct_access NULL -#define linear_dax_copy_from_iter NULL -#define linear_dax_copy_to_iter NULL #define linear_dax_zero_page_range NULL #endif @@ -225,8 +207,6 @@ static struct target_type linear_target = { .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, - .dax_copy_from_iter = linear_dax_copy_from_iter, - .dax_copy_to_iter = linear_dax_copy_to_iter, .dax_zero_page_range = linear_dax_zero_page_range, }; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index cdb22e7a1d0d..139b09b06eda 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -902,51 +902,6 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit } #if IS_ENABLED(CONFIG_FS_DAX) -static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, - struct iov_iter *i) -{ - struct pending_block *block; - - if (!bytes) - return 0; - - block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); - if (!block) { - DMERR("Error allocating dax pending block"); - return -ENOMEM; - } - - block->data = kzalloc(bytes, GFP_KERNEL); - if (!block->data) { - DMERR("Error allocating dax data space"); - kfree(block); - return -ENOMEM; - } - - /* write data provided via the iterator */ - if (!copy_from_iter(block->data, bytes, i)) { - DMERR("Error copying dax data"); - kfree(block->data); - kfree(block); - return -EIO; - } - - /* rewind the iterator so that the block driver can use it */ - iov_iter_revert(i, bytes); - - block->datalen = bytes; - block->sector = bio_to_dev_sectors(lc, sector); - block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift; - - atomic_inc(&lc->pending_blocks); - spin_lock_irq(&lc->blocks_lock); - list_add_tail(&block->list, &lc->unflushed_blocks); - spin_unlock_irq(&lc->blocks_lock); - wake_up_process(lc->log_kthread); - - return 0; -} - static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) { @@ -964,37 +919,6 @@ static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } -static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, - pgoff_t pgoff, void *addr, size_t bytes, - struct iov_iter *i) -{ - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; - struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - int err; - - /* Don't bother doing anything if logging has been disabled */ - if (!lc->logging_enabled) - goto dax_copy; - - err = log_dax(lc, sector, bytes, i); - if (err) { - DMWARN("Error %d logging DAX write", err); - return 0; - } -dax_copy: - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); -} - -static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, - pgoff_t pgoff, void *addr, size_t bytes, - struct iov_iter *i) -{ - struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); -} - static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { @@ -1005,8 +929,6 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, #else #define log_writes_dax_direct_access NULL -#define log_writes_dax_copy_from_iter NULL -#define log_writes_dax_copy_to_iter NULL #define log_writes_dax_zero_page_range NULL #endif @@ -1024,8 +946,6 @@ static struct target_type log_writes_target = { .iterate_devices = log_writes_iterate_devices, .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, - .dax_copy_from_iter = log_writes_dax_copy_from_iter, - .dax_copy_to_iter = log_writes_dax_copy_to_iter, .dax_zero_page_range = log_writes_dax_zero_page_range, }; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 50dba3f39274..e566115ec0bb 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -324,22 +324,6 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } -static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); -} - -static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); -} - static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { @@ -350,8 +334,6 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, #else #define stripe_dax_direct_access NULL -#define stripe_dax_copy_from_iter NULL -#define stripe_dax_copy_to_iter NULL #define stripe_dax_zero_page_range NULL #endif @@ -488,8 +470,6 @@ static struct target_type stripe_target = { .iterate_devices = stripe_iterate_devices, .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, - .dax_copy_from_iter = stripe_dax_copy_from_iter, - .dax_copy_to_iter = stripe_dax_copy_to_iter, .dax_zero_page_range = stripe_dax_zero_page_range, }; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f4b972af1092..ce250bd274f3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1027,54 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } -static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct mapped_device *md = dax_get_private(dax_dev); - sector_t sector = pgoff * PAGE_SECTORS; - struct dm_target *ti; - long ret = 0; - int srcu_idx; - - ti = dm_dax_get_live_target(md, sector, &srcu_idx); - - if (!ti) - goto out; - if (!ti->type->dax_copy_from_iter) { - ret = copy_from_iter(addr, bytes, i); - goto out; - } - ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); - out: - dm_put_live_table(md, srcu_idx); - - return ret; -} - -static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - struct mapped_device *md = dax_get_private(dax_dev); - sector_t sector = pgoff * PAGE_SECTORS; - struct dm_target *ti; - long ret = 0; - int srcu_idx; - - ti = dm_dax_get_live_target(md, sector, &srcu_idx); - - if (!ti) - goto out; - if (!ti->type->dax_copy_to_iter) { - ret = copy_to_iter(addr, bytes, i); - goto out; - } - ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); - out: - dm_put_live_table(md, srcu_idx); - - return ret; -} - static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { @@ -1770,6 +1722,8 @@ static struct mapped_device *alloc_dev(int minor) md->dax_dev = NULL; goto bad; } + set_dax_nocache(md->dax_dev); + set_dax_nomc(md->dax_dev); if (dax_add_host(md->dax_dev, md->disk)) goto bad; } @@ -3024,8 +2978,6 @@ static const struct block_device_operations dm_rq_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, - .copy_from_iter = dm_dax_copy_from_iter, - .copy_to_iter = dm_dax_copy_to_iter, .zero_page_range = dm_dax_zero_page_range, }; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 18b1d9c55831..58d95242a836 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -301,26 +301,8 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } -/* - * Bounds checking, both file offset and device offset, is handled by - * dax_iomap_actor() - */ -static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return _copy_from_iter_flushcache(addr, bytes, i); -} - -static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return _copy_mc_to_iter(addr, bytes, i); -} - static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, - .copy_from_iter = pmem_copy_from_iter, - .copy_to_iter = pmem_copy_to_iter, .zero_page_range = pmem_dax_zero_page_range, }; @@ -497,6 +479,8 @@ static int pmem_attach_disk(struct device *dev, rc = PTR_ERR(dax_dev); goto out; } + set_dax_nocache(dax_dev); + set_dax_nomc(dax_dev); if (is_nvdimm_sync(nd_region)) set_dax_synchronous(dax_dev); rc = dax_add_host(dax_dev, disk); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 10823debc09b..d614843caf6c 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -44,18 +44,6 @@ static const struct block_device_operations dcssblk_devops = { .release = dcssblk_release, }; -static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) -{ - return copy_to_iter(addr, bytes, i); -} - static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { @@ -72,8 +60,6 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, - .copy_from_iter = dcssblk_dax_copy_from_iter, - .copy_to_iter = dcssblk_dax_copy_to_iter, .zero_page_range = dcssblk_dax_zero_page_range, }; diff --git a/fs/dax.c b/fs/dax.c index e0eecd8e3a8f..cd03485867a7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1260,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, if (map_len > end - pos) map_len = end - pos; - /* - * The userspace address for the memory copy has already been - * validated via access_ok() in either vfs_read() or - * vfs_write(), depending on which operation we are doing. - */ if (iov_iter_rw(iter) == WRITE) xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 5c03a0364a9b..3928cd8ceba6 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -753,20 +753,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } -static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) -{ - return copy_to_iter(addr, bytes, i); -} - static int virtio_fs_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { @@ -783,8 +769,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, static const struct dax_operations virtio_fs_dax_ops = { .direct_access = virtio_fs_direct_access, - .copy_from_iter = virtio_fs_copy_from_iter, - .copy_to_iter = virtio_fs_copy_to_iter, .zero_page_range = virtio_fs_zero_page_range, }; diff --git a/include/linux/dax.h b/include/linux/dax.h index c04f46478e3b..9fc5f99a0ae2 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -28,12 +28,6 @@ struct dax_operations { */ bool (*dax_supported)(struct dax_device *, struct block_device *, int, sector_t, sector_t); - /* copy_from_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); - /* copy_to_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; @@ -95,6 +89,9 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, } #endif +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); + struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7df155ea49b..b26fecf6c8e8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); @@ -200,8 +198,6 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_iter_fn dax_copy_from_iter; - dm_dax_copy_iter_fn dax_copy_to_iter; dm_dax_zero_page_range_fn dax_zero_page_range; /* For internal device-mapper use. */ -- cgit v1.2.3