From c01dafad77fea8d64c4fdca0a6031c980842ad65 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 16 May 2019 12:04:53 -0400 Subject: libnvdimm: Fix compilation warnings with W=1 Several places (dimm_devs.c, core.c etc) include label.h but only label.c uses NSINDEX_SIGNATURE, so move its definition to label.c instead. In file included from drivers/nvdimm/dimm_devs.c:23: drivers/nvdimm/label.h:41:19: warning: 'NSINDEX_SIGNATURE' defined but not used [-Wunused-const-variable=] Also, some places abuse "/**" which is only reserved for the kernel-doc. drivers/nvdimm/bus.c:648: warning: cannot understand function prototype: 'struct attribute_group nd_device_attribute_group = ' drivers/nvdimm/bus.c:677: warning: cannot understand function prototype: 'struct attribute_group nd_numa_attribute_group = ' Those are just some member assignments for the "struct attribute_group" instances and it can't be expressed in the kernel-doc. Reviewed-by: Vishal Verma Signed-off-by: Qian Cai Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 ++-- drivers/nvdimm/label.c | 2 ++ drivers/nvdimm/label.h | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7ff684159f29a..2eb6a6cfe9e4c 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -642,7 +642,7 @@ static struct attribute *nd_device_attributes[] = { NULL, }; -/** +/* * nd_device_attribute_group - generic attributes for all devices on an nd bus */ struct attribute_group nd_device_attribute_group = { @@ -671,7 +671,7 @@ static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a, return a->mode; } -/** +/* * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus */ struct attribute_group nd_numa_attribute_group = { diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 2030805aa216c..edf278067e72b 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -25,6 +25,8 @@ static guid_t nvdimm_btt2_guid; static guid_t nvdimm_pfn_guid; static guid_t nvdimm_dax_guid; +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + static u32 best_seq(u32 a, u32 b) { a &= NSINDEX_SEQ_MASK; diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index e9a2ad3c2150b..4bb7add395801 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -38,8 +38,6 @@ enum { ND_NSINDEX_INIT = 0x1, }; -static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; - /** * struct nd_namespace_index - label set superblock * @sig: NAMESPACE_INDEX\0 -- cgit v1.2.3 From 7bf7eac8d648057519adb6fce1e31458c902212c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 16 May 2019 13:26:29 -0700 Subject: dax: Arrange for dax_supported check to span multiple devices Pankaj reports that starting with commit ad428cdb525a "dax: Check the end of the block-device capacity with dax_direct_access()" device-mapper no longer allows dax operation. This results from the stricter checks in __bdev_dax_supported() that validate that the start and end of a block-device map to the same 'pagemap' instance. Teach the dax-core and device-mapper to validate the 'pagemap' on a per-target basis. This is accomplished by refactoring the bdev_dax_supported() internals into generic_fsdax_supported() which takes a sector range to validate. Consequently generic_fsdax_supported() is suitable to be used in a device-mapper ->iterate_devices() callback. A new ->dax_supported() operation is added to allow composite devices to split and route upper-level bdev_dax_supported() requests. Fixes: ad428cdb525a ("dax: Check the end of the block-device...") Cc: Cc: Ira Weiny Cc: Dave Jiang Cc: Keith Busch Cc: Matthew Wilcox Cc: Vishal Verma Cc: Heiko Carstens Cc: Martin Schwidefsky Reviewed-by: Jan Kara Reported-by: Pankaj Gupta Reviewed-by: Pankaj Gupta Tested-by: Pankaj Gupta Tested-by: Vaibhav Jain Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- drivers/dax/super.c | 88 ++++++++++++++++++++++++++++---------------- drivers/md/dm-table.c | 17 ++++++--- drivers/md/dm.c | 20 ++++++++++ drivers/md/dm.h | 1 + drivers/nvdimm/pmem.c | 1 + drivers/s390/block/dcssblk.c | 1 + include/linux/dax.h | 26 +++++++++++++ 7 files changed, 117 insertions(+), 37 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index bbd57ca0634a1..3a7b0a0bf469d 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); #endif -/** - * __bdev_dax_supported() - Check if the device supports dax for filesystem - * @bdev: block device to check - * @blocksize: The block size of the device - * - * This is a library function for filesystems to check if the block device - * can be mounted with dax option. - * - * Return: true if supported, false if unsupported - */ -bool __bdev_dax_supported(struct block_device *bdev, int blocksize) +bool __generic_fsdax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t sectors) { - struct dax_device *dax_dev; bool dax_enabled = false; pgoff_t pgoff, pgoff_end; - struct request_queue *q; char buf[BDEVNAME_SIZE]; void *kaddr, *end_kaddr; pfn_t pfn, end_pfn; @@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } - q = bdev_get_queue(bdev); - if (!q || !blk_queue_dax(q)) { - pr_debug("%s: error: request queue doesn't support dax\n", - bdevname(bdev, buf)); - return false; - } - - err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); + err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); if (err) { pr_debug("%s: error: unaligned partition for dax\n", bdevname(bdev, buf)); return false; } - last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8; + last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); if (err) { pr_debug("%s: error: unaligned partition for dax\n", @@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } - dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); - if (!dax_dev) { - pr_debug("%s: error: device does not support dax\n", - bdevname(bdev, buf)); - return false; - } - id = dax_read_lock(); len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); dax_read_unlock(id); - put_dax(dax_dev); - if (len < 1 || len2 < 1) { pr_debug("%s: error: dax access failed (%ld)\n", bdevname(bdev, buf), len < 1 ? len : len2); @@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) } return true; } +EXPORT_SYMBOL_GPL(__generic_fsdax_supported); + +/** + * __bdev_dax_supported() - Check if the device supports dax for filesystem + * @bdev: block device to check + * @blocksize: The block size of the device + * + * This is a library function for filesystems to check if the block device + * can be mounted with dax option. + * + * Return: true if supported, false if unsupported + */ +bool __bdev_dax_supported(struct block_device *bdev, int blocksize) +{ + struct dax_device *dax_dev; + struct request_queue *q; + char buf[BDEVNAME_SIZE]; + bool ret; + int id; + + q = bdev_get_queue(bdev); + if (!q || !blk_queue_dax(q)) { + pr_debug("%s: error: request queue doesn't support dax\n", + bdevname(bdev, buf)); + return false; + } + + dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + if (!dax_dev) { + pr_debug("%s: error: device does not support dax\n", + bdevname(bdev, buf)); + return false; + } + + id = dax_read_lock(); + ret = dax_supported(dax_dev, bdev, blocksize, 0, + i_size_read(bdev->bd_inode) / 512); + dax_read_unlock(id); + + put_dax(dax_dev); + + return ret; +} EXPORT_SYMBOL_GPL(__bdev_dax_supported); #endif @@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, } EXPORT_SYMBOL_GPL(dax_direct_access); +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len) +{ + if (!dax_alive(dax_dev)) + return false; + + return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len); +} + size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index cde3b49b2a910..350cf04514562 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) } EXPORT_SYMBOL_GPL(dm_table_set_type); +/* validate the dax capability of the target device span */ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) + sector_t start, sector_t len, void *data) { - return bdev_dax_supported(dev->bdev, PAGE_SIZE); + int blocksize = *(int *) data; + + return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize, + start, len); } -static bool dm_table_supports_dax(struct dm_table *t) +bool dm_table_supports_dax(struct dm_table *t, int blocksize) { struct dm_target *ti; unsigned i; @@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct dm_table *t) return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_supports_dax, NULL)) + !ti->type->iterate_devices(ti, device_supports_dax, + &blocksize)) return false; } @@ -979,7 +984,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t) || + if (dm_table_supports_dax(t, PAGE_SIZE) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } else { @@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (dm_table_supports_dax(t)) + if (dm_table_supports_dax(t, PAGE_SIZE)) blk_queue_flag_set(QUEUE_FLAG_DAX, q); else blk_queue_flag_clear(QUEUE_FLAG_DAX, q); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1fb1333fefec1..b7c0ad01084dd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1107,6 +1107,25 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len) +{ + struct mapped_device *md = dax_get_private(dax_dev); + struct dm_table *map; + int srcu_idx; + bool ret; + + map = dm_get_live_table(md, &srcu_idx); + if (!map) + return false; + + ret = dm_table_supports_dax(map, blocksize); + + dm_put_live_table(md, srcu_idx); + + return ret; +} + static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { @@ -3192,6 +3211,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, + .dax_supported = dm_dax_supported, .copy_from_iter = dm_dax_copy_from_iter, .copy_to_iter = dm_dax_copy_to_iter, }; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 2d539b82ec08f..17e3db54404ca 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -72,6 +72,7 @@ bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +bool dm_table_supports_dax(struct dm_table *t, int blocksize); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0279eb1da3ef5..845c5b430cdd6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -295,6 +295,7 @@ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, + .dax_supported = generic_fsdax_supported, .copy_from_iter = pmem_copy_from_iter, .copy_to_iter = pmem_copy_to_iter, }; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 4e8aedd50cb0d..d04d4378ca50f 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -59,6 +59,7 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, + .dax_supported = generic_fsdax_supported, .copy_from_iter = dcssblk_dax_copy_from_iter, .copy_to_iter = dcssblk_dax_copy_to_iter, }; diff --git a/include/linux/dax.h b/include/linux/dax.h index 0dd316a74a295..becaea5f44888 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,6 +19,12 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); + /* + * Validate whether this device is usable as an fsdax backing + * device. + */ + bool (*dax_supported)(struct dax_device *, struct block_device *, int, + sector_t, sector_t); /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); @@ -75,6 +81,17 @@ static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) return __bdev_dax_supported(bdev, blocksize); } +bool __generic_fsdax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t sectors); +static inline bool generic_fsdax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t sectors) +{ + return __generic_fsdax_supported(dax_dev, bdev, blocksize, start, + sectors); +} + static inline struct dax_device *fs_dax_get_by_host(const char *host) { return dax_get_by_host(host); @@ -99,6 +116,13 @@ static inline bool bdev_dax_supported(struct block_device *bdev, return false; } +static inline bool generic_fsdax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t sectors) +{ + return false; +} + static inline struct dax_device *fs_dax_get_by_host(const char *host) { return NULL; @@ -142,6 +166,8 @@ bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, -- cgit v1.2.3 From 52f476a323f9efc959be1c890d0cdcf12e1582e0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 16 May 2019 17:05:21 -0700 Subject: libnvdimm/pmem: Bypass CONFIG_HARDENED_USERCOPY overhead Jeff discovered that performance improves from ~375K iops to ~519K iops on a simple psync-write fio workload when moving the location of 'struct page' from the default PMEM location to DRAM. This result is surprising because the expectation is that 'struct page' for dax is only needed for third party references to dax mappings. For example, a dax-mapped buffer passed to another system call for direct-I/O requires 'struct page' for sending the request down the driver stack and pinning the page. There is no usage of 'struct page' for first party access to a file via read(2)/write(2) and friends. However, this "no page needed" expectation is violated by CONFIG_HARDENED_USERCOPY and the check_copy_size() performed in copy_from_iter_full_nocache() and copy_to_iter_mcsafe(). The check_heap_object() helper routine assumes the buffer is backed by a slab allocator (DRAM) page and applies some checks. Those checks are invalid, dax pages do not originate from the slab, and redundant, dax_iomap_actor() has already validated that the I/O is within bounds. Specifically that routine validates that the logical file offset is within bounds of the file, then it does a sector-to-pfn translation which validates that the physical mapping is within bounds of the block device. Bypass additional hardened usercopy overhead and call the 'no check' versions of the copy_{to,from}_iter operations directly. Fixes: 0aed55af8834 ("x86, uaccess: introduce copy_from_iter_flushcache...") Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Reported-and-tested-by: Jeff Smits Acked-by: Kees Cook Acked-by: Jan Kara Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 845c5b430cdd6..d9d845077b8be 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -281,16 +281,22 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } +/* + * Use the 'no check' versions of copy_from_iter_flushcache() and + * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds + * checking, both file offset and device offset, is handled by + * dax_iomap_actor() + */ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return copy_from_iter_flushcache(addr, bytes, i); + return _copy_from_iter_flushcache(addr, bytes, i); } static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return copy_to_iter_mcsafe(addr, bytes, i); + return _copy_to_iter_mcsafe(addr, bytes, i); } static const struct dax_operations pmem_dax_ops = { -- cgit v1.2.3