From 51cf784c42d07fbd62cb604836a9270cf3361509 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 12 Jul 2017 17:58:21 -0700 Subject: device-dax: Start defining a dax bus model Towards eliminating the dax_class, move the dax-device-attribute enabling to a new bus.c file in the core. The amount of code thrash of sub-sequent patches is reduced as no logic changes are made, just pure code movement. A temporary export of unregister_dex_dax() and dax_attribute_groups is needed to preserve compilation, but those symbols become static again in a follow-on patch. Signed-off-by: Dan Williams --- drivers/dax/bus.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 drivers/dax/bus.c (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c new file mode 100644 index 0000000000000..8a398e8e1956f --- /dev/null +++ b/drivers/dax/bus.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include "dax-private.h" +#include "bus.h" + +/* + * Rely on the fact that drvdata is set before the attributes are + * registered, and that the attributes are unregistered before drvdata + * is cleared to assume that drvdata is always valid. + */ +static ssize_t id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_region *dax_region = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", dax_region->id); +} +static DEVICE_ATTR_RO(id); + +static ssize_t region_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_region *dax_region = dev_get_drvdata(dev); + + return sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&dax_region->res)); +} +static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, + region_size_show, NULL); + +static ssize_t align_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_region *dax_region = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", dax_region->align); +} +static DEVICE_ATTR_RO(align); + +static struct attribute *dax_region_attributes[] = { + &dev_attr_region_size.attr, + &dev_attr_align.attr, + &dev_attr_id.attr, + NULL, +}; + +static const struct attribute_group dax_region_attribute_group = { + .name = "dax_region", + .attrs = dax_region_attributes, +}; + +static const struct attribute_group *dax_region_attribute_groups[] = { + &dax_region_attribute_group, + NULL, +}; + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + +static void dax_region_unregister(void *region) +{ + struct dax_region *dax_region = region; + + sysfs_remove_groups(&dax_region->dev->kobj, + dax_region_attribute_groups); + dax_region_put(dax_region); +} + +struct dax_region *alloc_dax_region(struct device *parent, int region_id, + struct resource *res, unsigned int align, + unsigned long pfn_flags) +{ + struct dax_region *dax_region; + + /* + * The DAX core assumes that it can store its private data in + * parent->driver_data. This WARN is a reminder / safeguard for + * developers of device-dax drivers. + */ + if (dev_get_drvdata(parent)) { + dev_WARN(parent, "dax core failed to setup private data\n"); + return NULL; + } + + if (!IS_ALIGNED(res->start, align) + || !IS_ALIGNED(resource_size(res), align)) + return NULL; + + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + if (!dax_region) + return NULL; + + dev_set_drvdata(parent, dax_region); + memcpy(&dax_region->res, res, sizeof(*res)); + dax_region->pfn_flags = pfn_flags; + kref_init(&dax_region->kref); + dax_region->id = region_id; + dax_region->align = align; + dax_region->dev = parent; + if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { + kfree(dax_region); + return NULL; + } + + kref_get(&dax_region->kref); + if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) + return NULL; + return dax_region; +} +EXPORT_SYMBOL_GPL(alloc_dax_region); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_dax *dev_dax = to_dev_dax(dev); + unsigned long long size = resource_size(&dev_dax->region->res); + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *dev_dax_attributes[] = { + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group dev_dax_attribute_group = { + .attrs = dev_dax_attributes, +}; + +const struct attribute_group *dax_attribute_groups[] = { + &dev_dax_attribute_group, + NULL, +}; +EXPORT_SYMBOL_GPL(dax_attribute_groups); + +void kill_dev_dax(struct dev_dax *dev_dax) +{ + struct dax_device *dax_dev = dev_dax->dax_dev; + struct inode *inode = dax_inode(dax_dev); + + kill_dax(dax_dev); + unmap_mapping_range(inode->i_mapping, 0, 0, 1); +} +EXPORT_SYMBOL_GPL(kill_dev_dax); + +void unregister_dev_dax(void *dev) +{ + struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_device *dax_dev = dev_dax->dax_dev; + struct inode *inode = dax_inode(dax_dev); + struct cdev *cdev = inode->i_cdev; + + dev_dbg(dev, "trace\n"); + + kill_dev_dax(dev_dax); + cdev_device_del(cdev, dev); + put_device(dev); +} +EXPORT_SYMBOL_GPL(unregister_dev_dax); -- cgit v1.2.3 From 9567da0b408a2553d32ca83cba4f1fc5a8aad459 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 12 Jul 2017 17:58:21 -0700 Subject: device-dax: Introduce bus + driver model In support of multiple device-dax instances per device-dax-region and allowing the 'kmem' driver to attach to dax-instances instead of the current device-node access, convert the dax sub-system from a class to a bus. Recall that the kmem driver takes reserved / special purpose memories and assigns them to be managed by the core-mm. Aside from the fact the device-dax instances are registered and probed on a bus, two other lifetime-management changes are made: 1/ Delay attaching a cdev until driver probe time 2/ A new run_dax() helper is introduced to allow restoring dax-operation after a kill_dax() event. So, at driver ->probe() time we run_dax() and at ->remove() time we kill_dax() and invalidate all mappings. Signed-off-by: Dan Williams --- drivers/dax/bus.c | 133 +++++++++++++++++++++++++++++++++++++++++++--- drivers/dax/bus.h | 16 ++++++ drivers/dax/dax-private.h | 6 +-- drivers/dax/device.c | 95 +++++++++++---------------------- drivers/dax/super.c | 40 +++++++++----- 5 files changed, 203 insertions(+), 87 deletions(-) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 8a398e8e1956f..0cff32102c4c8 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -6,6 +6,33 @@ #include "dax-private.h" #include "bus.h" +static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + /* + * We only ever expect to handle device-dax instances, i.e. the + * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero + */ + return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); +} + +static int dax_bus_match(struct device *dev, struct device_driver *drv); + +static struct bus_type dax_bus_type = { + .name = "dax", + .uevent = dax_bus_uevent, + .match = dax_bus_match, +}; + +static int dax_bus_match(struct device *dev, struct device_driver *drv) +{ + /* + * The drivers that can register on the 'dax' bus are private to + * drivers/dax/ so any device and driver on the bus always + * match. + */ + return 1; +} + /* * Rely on the fact that drvdata is set before the attributes are * registered, and that the attributes are unregistered before drvdata @@ -142,11 +169,10 @@ static const struct attribute_group dev_dax_attribute_group = { .attrs = dev_dax_attributes, }; -const struct attribute_group *dax_attribute_groups[] = { +static const struct attribute_group *dax_attribute_groups[] = { &dev_dax_attribute_group, NULL, }; -EXPORT_SYMBOL_GPL(dax_attribute_groups); void kill_dev_dax(struct dev_dax *dev_dax) { @@ -158,17 +184,108 @@ void kill_dev_dax(struct dev_dax *dev_dax) } EXPORT_SYMBOL_GPL(kill_dev_dax); -void unregister_dev_dax(void *dev) +static void dev_dax_release(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_region *dax_region = dev_dax->region; struct dax_device *dax_dev = dev_dax->dax_dev; - struct inode *inode = dax_inode(dax_dev); - struct cdev *cdev = inode->i_cdev; - dev_dbg(dev, "trace\n"); + dax_region_put(dax_region); + put_dax(dax_dev); + kfree(dev_dax); +} + +static void unregister_dev_dax(void *dev) +{ + struct dev_dax *dev_dax = to_dev_dax(dev); + + dev_dbg(dev, "%s\n", __func__); kill_dev_dax(dev_dax); - cdev_device_del(cdev, dev); + device_del(dev); put_device(dev); } -EXPORT_SYMBOL_GPL(unregister_dev_dax); + +struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id) +{ + struct device *parent = dax_region->dev; + struct dax_device *dax_dev; + struct dev_dax *dev_dax; + struct inode *inode; + struct device *dev; + int rc = -ENOMEM; + + if (id < 0) + return ERR_PTR(-EINVAL); + + dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); + if (!dev_dax) + return ERR_PTR(-ENOMEM); + + /* + * No 'host' or dax_operations since there is no access to this + * device outside of mmap of the resulting character device. + */ + dax_dev = alloc_dax(dev_dax, NULL, NULL); + if (!dax_dev) + goto err; + + /* a device_dax instance is dead while the driver is not attached */ + kill_dax(dax_dev); + + /* from here on we're committed to teardown via dax_dev_release() */ + dev = &dev_dax->dev; + device_initialize(dev); + + dev_dax->dax_dev = dax_dev; + dev_dax->region = dax_region; + kref_get(&dax_region->kref); + + inode = dax_inode(dax_dev); + dev->devt = inode->i_rdev; + dev->bus = &dax_bus_type; + dev->parent = parent; + dev->groups = dax_attribute_groups; + dev->release = dev_dax_release; + dev_set_name(dev, "dax%d.%d", dax_region->id, id); + + rc = device_add(dev); + if (rc) { + kill_dev_dax(dev_dax); + put_device(dev); + return ERR_PTR(rc); + } + + rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); + if (rc) + return ERR_PTR(rc); + + return dev_dax; + + err: + kfree(dev_dax); + + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(devm_create_dev_dax); + +int __dax_driver_register(struct device_driver *drv, + struct module *module, const char *mod_name) +{ + drv->owner = module; + drv->name = mod_name; + drv->mod_name = mod_name; + drv->bus = &dax_bus_type; + return driver_register(drv); +} +EXPORT_SYMBOL_GPL(__dax_driver_register); + +int __init dax_bus_init(void) +{ + return bus_register(&dax_bus_type); +} + +void __exit dax_bus_exit(void) +{ + bus_unregister(&dax_bus_type); +} diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 840865aa69e81..ea509504df3aa 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -11,5 +11,21 @@ void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, unsigned long flags); struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id); +int __dax_driver_register(struct device_driver *drv, + struct module *module, const char *mod_name); +#define dax_driver_register(driver) \ + __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) void kill_dev_dax(struct dev_dax *dev_dax); + +/* + * While run_dax() is potentially a generic operation that could be + * defined in include/linux/dax.h we don't want to grow any users + * outside of drivers/dax/ + */ +void run_dax(struct dax_device *dax_dev); + +#define MODULE_ALIAS_DAX_DEVICE(type) \ + MODULE_ALIAS("dax:t" __stringify(type) "*") +#define DAX_DEVICE_MODALIAS_FMT "dax:t%d" + #endif /* __DAX_BUS_H__ */ diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 620c3f4eefe7d..c3a1217008373 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -20,10 +20,8 @@ struct dax_device; struct dax_device *inode_dax(struct inode *inode); struct inode *dax_inode(struct dax_device *dax_dev); - -/* temporary until devm_create_dax_dev moves to bus.c */ -extern const struct attribute_group *dax_attribute_groups[]; -void unregister_dev_dax(void *dev); +int dax_bus_init(void); +void dax_bus_exit(void); /** * struct dax_region - mapping infrastructure for dax devices diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 1fc375783e0b2..f55829404a241 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -13,8 +13,6 @@ #include "dax-private.h" #include "bus.h" -static struct class *dax_class; - static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, const char *func) { @@ -404,93 +402,64 @@ static const struct file_operations dax_fops = { .mmap_supported_flags = MAP_SYNC, }; -static void dev_dax_release(struct device *dev) +static void dev_dax_cdev_del(void *cdev) { - struct dev_dax *dev_dax = to_dev_dax(dev); - struct dax_region *dax_region = dev_dax->region; - struct dax_device *dax_dev = dev_dax->dax_dev; + cdev_del(cdev); +} - dax_region_put(dax_region); - put_dax(dax_dev); - kfree(dev_dax); +static void dev_dax_kill(void *dev_dax) +{ + kill_dev_dax(dev_dax); } -struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id) +static int dev_dax_probe(struct device *dev) { - struct device *parent = dax_region->dev; - struct dax_device *dax_dev; - struct dev_dax *dev_dax; + struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_device *dax_dev = dev_dax->dax_dev; struct inode *inode; - struct device *dev; struct cdev *cdev; int rc; - dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); - if (!dev_dax) - return ERR_PTR(-ENOMEM); - - /* - * No 'host' or dax_operations since there is no access to this - * device outside of mmap of the resulting character device. - */ - dax_dev = alloc_dax(dev_dax, NULL, NULL); - if (!dax_dev) { - rc = -ENOMEM; - goto err; - } - - /* from here on we're committed to teardown via dax_dev_release() */ - dev = &dev_dax->dev; - device_initialize(dev); - inode = dax_inode(dax_dev); cdev = inode->i_cdev; cdev_init(cdev, &dax_fops); - cdev->owner = parent->driver->owner; - - dev_dax->dax_dev = dax_dev; - dev_dax->region = dax_region; - kref_get(&dax_region->kref); - - dev->devt = inode->i_rdev; - dev->class = dax_class; - dev->parent = parent; - dev->groups = dax_attribute_groups; - dev->release = dev_dax_release; - dev_set_name(dev, "dax%d.%d", dax_region->id, id); - - rc = cdev_device_add(cdev, dev); - if (rc) { - kill_dev_dax(dev_dax); - put_device(dev); - return ERR_PTR(rc); - } - - rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); + cdev->owner = dev->driver->owner; + cdev_set_parent(cdev, &dev->kobj); + rc = cdev_add(cdev, dev->devt, 1); if (rc) - return ERR_PTR(rc); + return rc; - return dev_dax; + rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev); + if (rc) + return rc; - err: - kfree(dev_dax); + run_dax(dax_dev); + return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax); +} - return ERR_PTR(rc); +static int dev_dax_remove(struct device *dev) +{ + /* all probe actions are unwound by devm */ + return 0; } -EXPORT_SYMBOL_GPL(devm_create_dev_dax); + +static struct device_driver device_dax_driver = { + .probe = dev_dax_probe, + .remove = dev_dax_remove, +}; static int __init dax_init(void) { - dax_class = class_create(THIS_MODULE, "dax"); - return PTR_ERR_OR_ZERO(dax_class); + return dax_driver_register(&device_dax_driver); } static void __exit dax_exit(void) { - class_destroy(dax_class); + driver_unregister(&device_dax_driver); } MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL v2"); -subsys_initcall(dax_init); +module_init(dax_init); module_exit(dax_exit); +MODULE_ALIAS_DAX_DEVICE(0); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 0ecc1a2cf1ccd..ccb22d8db3a26 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -366,11 +366,15 @@ void kill_dax(struct dax_device *dax_dev) spin_lock(&dax_host_lock); hlist_del_init(&dax_dev->list); spin_unlock(&dax_host_lock); - - dax_dev->private = NULL; } EXPORT_SYMBOL_GPL(kill_dax); +void run_dax(struct dax_device *dax_dev) +{ + set_bit(DAXDEV_ALIVE, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(run_dax); + static struct inode *dax_alloc_inode(struct super_block *sb) { struct dax_device *dax_dev; @@ -585,6 +589,8 @@ EXPORT_SYMBOL_GPL(dax_inode); void *dax_get_private(struct dax_device *dax_dev) { + if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags)) + return NULL; return dax_dev->private; } EXPORT_SYMBOL_GPL(dax_get_private); @@ -598,7 +604,7 @@ static void init_once(void *_dax_dev) inode_init_once(inode); } -static int __dax_fs_init(void) +static int dax_fs_init(void) { int rc; @@ -630,35 +636,45 @@ static int __dax_fs_init(void) return rc; } -static void __dax_fs_exit(void) +static void dax_fs_exit(void) { kern_unmount(dax_mnt); unregister_filesystem(&dax_fs_type); kmem_cache_destroy(dax_cache); } -static int __init dax_fs_init(void) +static int __init dax_core_init(void) { int rc; - rc = __dax_fs_init(); + rc = dax_fs_init(); if (rc) return rc; rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax"); if (rc) - __dax_fs_exit(); - return rc; + goto err_chrdev; + + rc = dax_bus_init(); + if (rc) + goto err_bus; + return 0; + +err_bus: + unregister_chrdev_region(dax_devt, MINORMASK+1); +err_chrdev: + dax_fs_exit(); + return 0; } -static void __exit dax_fs_exit(void) +static void __exit dax_core_exit(void) { unregister_chrdev_region(dax_devt, MINORMASK+1); ida_destroy(&dax_minor_ida); - __dax_fs_exit(); + dax_fs_exit(); } MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL v2"); -subsys_initcall(dax_fs_init); -module_exit(dax_fs_exit); +subsys_initcall(dax_core_init); +module_exit(dax_core_exit); -- cgit v1.2.3 From 89ec9f2cfa36cc5fca2fb445ed221bb9add7b536 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Oct 2018 15:52:42 -0700 Subject: device-dax: Move resource pinning+mapping into the common driver Move the responsibility of calling devm_request_resource() and devm_memremap_pages() into the common device-dax driver. This is another preparatory step to allowing an alternate personality driver for a device-dax range. Signed-off-by: Dan Williams --- drivers/dax/bus.c | 6 +++- drivers/dax/bus.h | 3 +- drivers/dax/dax-private.h | 9 ++++- drivers/dax/device.c | 61 ++++++++++++++++++++++++++++++++ drivers/dax/pmem.c | 90 ++++++++--------------------------------------- 5 files changed, 90 insertions(+), 79 deletions(-) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 0cff32102c4c8..69aae2cbd45f1 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -206,7 +207,8 @@ static void unregister_dev_dax(void *dev) put_device(dev); } -struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id) +struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, + struct dev_pagemap *pgmap) { struct device *parent = dax_region->dev; struct dax_device *dax_dev; @@ -222,6 +224,8 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id) if (!dev_dax) return ERR_PTR(-ENOMEM); + memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap)); + /* * No 'host' or dax_operations since there is no access to this * device outside of mmap of the resulting character device. diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index ea509504df3aa..e08e0c394983d 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -10,7 +10,8 @@ struct dax_region; void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, unsigned long flags); -struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id); +struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, + struct dev_pagemap *pgmap); int __dax_driver_register(struct device_driver *drv, struct module *module, const char *mod_name); #define dax_driver_register(driver) \ diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index c3a1217008373..a82ce48f58844 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -42,15 +42,22 @@ struct dax_region { }; /** - * struct dev_dax - instance data for a subdivision of a dax region + * struct dev_dax - instance data for a subdivision of a dax region, and + * data while the device is activated in the driver. * @region - parent region * @dax_dev - core dax functionality * @dev - device core + * @pgmap - pgmap for memmap setup / lifetime (driver owned) + * @ref: pgmap reference count (driver owned) + * @cmp: @ref final put completion (driver owned) */ struct dev_dax { struct dax_region *region; struct dax_device *dax_dev; struct device dev; + struct dev_pagemap pgmap; + struct percpu_ref ref; + struct completion cmp; }; static inline struct dev_dax *to_dev_dax(struct device *dev) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index f55829404a241..6ad964d7b0779 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -13,6 +14,38 @@ #include "dax-private.h" #include "bus.h" +static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref) +{ + return container_of(ref, struct dev_dax, ref); +} + +static void dev_dax_percpu_release(struct percpu_ref *ref) +{ + struct dev_dax *dev_dax = ref_to_dev_dax(ref); + + dev_dbg(&dev_dax->dev, "%s\n", __func__); + complete(&dev_dax->cmp); +} + +static void dev_dax_percpu_exit(void *data) +{ + struct percpu_ref *ref = data; + struct dev_dax *dev_dax = ref_to_dev_dax(ref); + + dev_dbg(&dev_dax->dev, "%s\n", __func__); + wait_for_completion(&dev_dax->cmp); + percpu_ref_exit(ref); +} + +static void dev_dax_percpu_kill(struct percpu_ref *data) +{ + struct percpu_ref *ref = data; + struct dev_dax *dev_dax = ref_to_dev_dax(ref); + + dev_dbg(&dev_dax->dev, "%s\n", __func__); + percpu_ref_kill(ref); +} + static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, const char *func) { @@ -416,10 +449,38 @@ static int dev_dax_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); struct dax_device *dax_dev = dev_dax->dax_dev; + struct resource *res = &dev_dax->region->res; struct inode *inode; struct cdev *cdev; + void *addr; int rc; + /* 1:1 map region resource range to device-dax instance range */ + if (!devm_request_mem_region(dev, res->start, resource_size(res), + dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", res); + return -EBUSY; + } + + init_completion(&dev_dax->cmp); + rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0, + GFP_KERNEL); + if (rc) + return rc; + + rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref); + if (rc) + return rc; + + dev_dax->pgmap.ref = &dev_dax->ref; + dev_dax->pgmap.kill = dev_dax_percpu_kill; + addr = devm_memremap_pages(dev, &dev_dax->pgmap); + if (IS_ERR(addr)) { + devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref); + percpu_ref_exit(&dev_dax->ref); + return PTR_ERR(addr); + } + inode = dax_inode(dax_dev); cdev = inode->i_cdev; cdev_init(cdev, &dax_fops); diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index e1b50d8fd1f8a..d3cefa7868ac7 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -18,54 +18,16 @@ #include "../nvdimm/nd.h" #include "bus.h" -struct dax_pmem { - struct device *dev; - struct percpu_ref ref; - struct dev_pagemap pgmap; - struct completion cmp; -}; - -static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref) -{ - return container_of(ref, struct dax_pmem, ref); -} - -static void dax_pmem_percpu_release(struct percpu_ref *ref) -{ - struct dax_pmem *dax_pmem = to_dax_pmem(ref); - - dev_dbg(dax_pmem->dev, "trace\n"); - complete(&dax_pmem->cmp); -} - -static void dax_pmem_percpu_exit(void *data) -{ - struct percpu_ref *ref = data; - struct dax_pmem *dax_pmem = to_dax_pmem(ref); - - dev_dbg(dax_pmem->dev, "trace\n"); - wait_for_completion(&dax_pmem->cmp); - percpu_ref_exit(ref); -} - -static void dax_pmem_percpu_kill(struct percpu_ref *ref) -{ - struct dax_pmem *dax_pmem = to_dax_pmem(ref); - - dev_dbg(dax_pmem->dev, "trace\n"); - percpu_ref_kill(ref); -} - static int dax_pmem_probe(struct device *dev) { - void *addr; struct resource res; int rc, id, region_id; + resource_size_t offset; struct nd_pfn_sb *pfn_sb; struct dev_dax *dev_dax; - struct dax_pmem *dax_pmem; struct nd_namespace_io *nsio; struct dax_region *dax_region; + struct dev_pagemap pgmap = { 0 }; struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; @@ -75,61 +37,37 @@ static int dax_pmem_probe(struct device *dev) return PTR_ERR(ndns); nsio = to_nd_namespace_io(&ndns->dev); - dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); - if (!dax_pmem) - return -ENOMEM; - /* parse the 'pfn' info block via ->rw_bytes */ rc = devm_nsio_enable(dev, nsio); if (rc) return rc; - rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap); + rc = nvdimm_setup_pfn(nd_pfn, &pgmap); if (rc) return rc; devm_nsio_disable(dev, nsio); - pfn_sb = nd_pfn->pfn_sb; - - if (!devm_request_mem_region(dev, nsio->res.start, - resource_size(&nsio->res), + /* reserve the metadata area, device-dax will reserve the data */ + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + if (!devm_request_mem_region(dev, nsio->res.start, offset, dev_name(&ndns->dev))) { - dev_warn(dev, "could not reserve region %pR\n", &nsio->res); - return -EBUSY; - } - - dax_pmem->dev = dev; - init_completion(&dax_pmem->cmp); - rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, - GFP_KERNEL); - if (rc) - return rc; - - rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); - if (rc) { - percpu_ref_exit(&dax_pmem->ref); - return rc; - } - - dax_pmem->pgmap.ref = &dax_pmem->ref; - dax_pmem->pgmap.kill = dax_pmem_percpu_kill; - addr = devm_memremap_pages(dev, &dax_pmem->pgmap); - if (IS_ERR(addr)) - return PTR_ERR(addr); - - /* adjust the dax_region resource to the start of data */ - memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); - res.start += le64_to_cpu(pfn_sb->dataoff); + dev_warn(dev, "could not reserve metadata\n"); + return -EBUSY; + } rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); if (rc != 2) return -EINVAL; + /* adjust the dax_region resource to the start of data */ + memcpy(&res, &pgmap.res, sizeof(res)); + res.start += offset; dax_region = alloc_dax_region(dev, region_id, &res, le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP); if (!dax_region) return -ENOMEM; - dev_dax = devm_create_dev_dax(dax_region, id); + dev_dax = devm_create_dev_dax(dax_region, id, &pgmap); /* child dev_dax instances now own the lifetime of the dax_region */ dax_region_put(dax_region); -- cgit v1.2.3 From d200781ef237a354d918ceff5cee350d88a93d42 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Nov 2018 15:31:23 -0800 Subject: device-dax: Add support for a dax override driver Introduce the 'new_id' concept for enabling a custom device-driver attach policy for dax-bus drivers. The intended use is to have a mechanism for hot-plugging device-dax ranges into the page allocator on-demand. With this in place the default policy of using device-dax for performance differentiated memory can be overridden by user-space policy that can arrange for the memory range to be managed as 'System RAM' with user-defined NUMA and other performance attributes. Signed-off-by: Dan Williams --- drivers/dax/bus.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++-- drivers/dax/bus.h | 10 +++- drivers/dax/device.c | 11 ++-- 3 files changed, 156 insertions(+), 10 deletions(-) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 69aae2cbd45f1..17af6fbc3be5b 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -2,11 +2,21 @@ /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ #include #include +#include +#include #include #include #include "dax-private.h" #include "bus.h" +static DEFINE_MUTEX(dax_bus_lock); + +#define DAX_NAME_LEN 30 +struct dax_id { + struct list_head list; + char dev_name[DAX_NAME_LEN]; +}; + static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) { /* @@ -16,22 +26,115 @@ static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); } +static struct dax_device_driver *to_dax_drv(struct device_driver *drv) +{ + return container_of(drv, struct dax_device_driver, drv); +} + +static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv, + const char *dev_name) +{ + struct dax_id *dax_id; + + lockdep_assert_held(&dax_bus_lock); + + list_for_each_entry(dax_id, &dax_drv->ids, list) + if (sysfs_streq(dax_id->dev_name, dev_name)) + return dax_id; + return NULL; +} + +static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) +{ + int match; + + mutex_lock(&dax_bus_lock); + match = !!__dax_match_id(dax_drv, dev_name(dev)); + mutex_unlock(&dax_bus_lock); + + return match; +} + +static ssize_t do_id_store(struct device_driver *drv, const char *buf, + size_t count, bool add) +{ + struct dax_device_driver *dax_drv = to_dax_drv(drv); + unsigned int region_id, id; + char devname[DAX_NAME_LEN]; + struct dax_id *dax_id; + ssize_t rc = count; + int fields; + + fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); + if (fields != 2) + return -EINVAL; + sprintf(devname, "dax%d.%d", region_id, id); + if (!sysfs_streq(buf, devname)) + return -EINVAL; + + mutex_lock(&dax_bus_lock); + dax_id = __dax_match_id(dax_drv, buf); + if (!dax_id) { + if (add) { + dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); + if (dax_id) { + strncpy(dax_id->dev_name, buf, DAX_NAME_LEN); + list_add(&dax_id->list, &dax_drv->ids); + } else + rc = -ENOMEM; + } else + /* nothing to remove */; + } else if (!add) { + list_del(&dax_id->list); + kfree(dax_id); + } else + /* dax_id already added */; + mutex_unlock(&dax_bus_lock); + return rc; +} + +static ssize_t new_id_store(struct device_driver *drv, const char *buf, + size_t count) +{ + return do_id_store(drv, buf, count, true); +} +static DRIVER_ATTR_WO(new_id); + +static ssize_t remove_id_store(struct device_driver *drv, const char *buf, + size_t count) +{ + return do_id_store(drv, buf, count, false); +} +static DRIVER_ATTR_WO(remove_id); + +static struct attribute *dax_drv_attrs[] = { + &driver_attr_new_id.attr, + &driver_attr_remove_id.attr, + NULL, +}; +ATTRIBUTE_GROUPS(dax_drv); + static int dax_bus_match(struct device *dev, struct device_driver *drv); static struct bus_type dax_bus_type = { .name = "dax", .uevent = dax_bus_uevent, .match = dax_bus_match, + .drv_groups = dax_drv_groups, }; static int dax_bus_match(struct device *dev, struct device_driver *drv) { + struct dax_device_driver *dax_drv = to_dax_drv(drv); + /* - * The drivers that can register on the 'dax' bus are private to - * drivers/dax/ so any device and driver on the bus always - * match. + * All but the 'device-dax' driver, which has 'match_always' + * set, requires an exact id match. */ - return 1; + if (dax_drv->match_always) + return 1; + + return dax_match_id(dax_drv, dev); } /* @@ -273,17 +376,49 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, } EXPORT_SYMBOL_GPL(devm_create_dev_dax); -int __dax_driver_register(struct device_driver *drv, +static int match_always_count; + +int __dax_driver_register(struct dax_device_driver *dax_drv, struct module *module, const char *mod_name) { + struct device_driver *drv = &dax_drv->drv; + int rc = 0; + + INIT_LIST_HEAD(&dax_drv->ids); drv->owner = module; drv->name = mod_name; drv->mod_name = mod_name; drv->bus = &dax_bus_type; + + /* there can only be one default driver */ + mutex_lock(&dax_bus_lock); + match_always_count += dax_drv->match_always; + if (match_always_count > 1) { + match_always_count--; + WARN_ON(1); + rc = -EINVAL; + } + mutex_unlock(&dax_bus_lock); + if (rc) + return rc; return driver_register(drv); } EXPORT_SYMBOL_GPL(__dax_driver_register); +void dax_driver_unregister(struct dax_device_driver *dax_drv) +{ + struct dax_id *dax_id, *_id; + + mutex_lock(&dax_bus_lock); + match_always_count -= dax_drv->match_always; + list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { + list_del(&dax_id->list); + kfree(dax_id); + } + mutex_unlock(&dax_bus_lock); +} +EXPORT_SYMBOL_GPL(dax_driver_unregister); + int __init dax_bus_init(void) { return bus_register(&dax_bus_type); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index e08e0c394983d..395ab812367c8 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -12,10 +12,18 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, unsigned long flags); struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, struct dev_pagemap *pgmap); -int __dax_driver_register(struct device_driver *drv, + +struct dax_device_driver { + struct device_driver drv; + struct list_head ids; + int match_always; +}; + +int __dax_driver_register(struct dax_device_driver *dax_drv, struct module *module, const char *mod_name); #define dax_driver_register(driver) \ __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); /* diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6ad964d7b0779..ad3120395f7a2 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -504,9 +504,12 @@ static int dev_dax_remove(struct device *dev) return 0; } -static struct device_driver device_dax_driver = { - .probe = dev_dax_probe, - .remove = dev_dax_remove, +static struct dax_device_driver device_dax_driver = { + .drv = { + .probe = dev_dax_probe, + .remove = dev_dax_remove, + }, + .match_always = 1, }; static int __init dax_init(void) @@ -516,7 +519,7 @@ static int __init dax_init(void) static void __exit dax_exit(void) { - driver_unregister(&device_dax_driver); + dax_driver_unregister(&device_dax_driver); } MODULE_AUTHOR("Intel Corporation"); -- cgit v1.2.3 From 730926c3b0998943654019f00296cf8e3b02277e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 16 Jul 2017 13:51:53 -0700 Subject: device-dax: Add /sys/class/dax backwards compatibility On the expectation that some environments may not upgrade libdaxctl (userspace component that depends on the /sys/class/dax hierarchy), provide a default / legacy dax_pmem_compat driver. The dax_pmem_compat driver implements the original /sys/class/dax sysfs layout rather than /sys/bus/dax. When userspace is upgraded it can blacklist this module and switch to the dax_pmem driver going forward. CONFIG_DEV_DAX_PMEM_COMPAT and supporting code will be deleted according to the dax_pmem entry in Documentation/ABI/obsolete/. Signed-off-by: Dan Williams --- Documentation/ABI/obsolete/sysfs-class-dax | 22 ++++++++ drivers/dax/Kconfig | 12 +++- drivers/dax/Makefile | 4 +- drivers/dax/bus.c | 29 ++++++++-- drivers/dax/bus.h | 26 ++++++++- drivers/dax/device.c | 9 ++- drivers/dax/pmem.c | 90 ------------------------------ drivers/dax/pmem/Makefile | 7 +++ drivers/dax/pmem/compat.c | 73 ++++++++++++++++++++++++ drivers/dax/pmem/core.c | 69 +++++++++++++++++++++++ drivers/dax/pmem/pmem.c | 40 +++++++++++++ tools/testing/nvdimm/Kbuild | 6 +- 12 files changed, 283 insertions(+), 104 deletions(-) create mode 100644 Documentation/ABI/obsolete/sysfs-class-dax delete mode 100644 drivers/dax/pmem.c create mode 100644 drivers/dax/pmem/Makefile create mode 100644 drivers/dax/pmem/compat.c create mode 100644 drivers/dax/pmem/core.c create mode 100644 drivers/dax/pmem/pmem.c (limited to 'drivers/dax/bus.c') diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax new file mode 100644 index 0000000000000..2cb9fc5e8bd14 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-class-dax @@ -0,0 +1,22 @@ +What: /sys/class/dax/ +Date: May, 2016 +KernelVersion: v4.7 +Contact: linux-nvdimm@lists.01.org +Description: Device DAX is the device-centric analogue of Filesystem + DAX (CONFIG_FS_DAX). It allows memory ranges to be + allocated and mapped without need of an intervening file + system. Device DAX is strict, precise and predictable. + Specifically this interface: + + 1/ Guarantees fault granularity with respect to a given + page size (pte, pmd, or pud) set at configuration time. + + 2/ Enforces deterministic behavior by being strict about + what fault scenarios are supported. + + The /sys/class/dax/ interface enumerates all the + device-dax instances in the system. The ABI is + deprecated and will be removed after 2020. It is + replaced with the DAX bus interface /sys/bus/dax/ where + device-dax instances can be found under + /sys/bus/dax/devices/ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index e0700bf4893a3..6fc96f03920e2 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -23,12 +23,22 @@ config DEV_DAX config DEV_DAX_PMEM tristate "PMEM DAX: direct access to persistent memory" depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX + depends on m # until we can kill DEV_DAX_PMEM_COMPAT default DEV_DAX help Support raw access to persistent memory. Note that this driver consumes memory ranges allocated and exported by the libnvdimm sub-system. - Say Y if unsure + Say M if unsure + +config DEV_DAX_PMEM_COMPAT + tristate "PMEM DAX: support the deprecated /sys/class/dax interface" + depends on DEV_DAX_PMEM + default DEV_DAX_PMEM + help + Older versions of the libdaxctl library expect to find all + device-dax instances under /sys/class/dax. If libdaxctl in + your distribution is older than v58 say M, otherwise say N. endif diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 658e6b9b1d741..233bbffccbe66 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o -obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o dax-y := super.o dax-y += bus.o -dax_pmem-y := pmem.o device_dax-y := device.o + +obj-y += pmem/ diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 17af6fbc3be5b..5681685002175 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -9,6 +9,8 @@ #include "dax-private.h" #include "bus.h" +static struct class *dax_class; + static DEFINE_MUTEX(dax_bus_lock); #define DAX_NAME_LEN 30 @@ -310,8 +312,8 @@ static void unregister_dev_dax(void *dev) put_device(dev); } -struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, - struct dev_pagemap *pgmap) +struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, + struct dev_pagemap *pgmap, enum dev_dax_subsys subsys) { struct device *parent = dax_region->dev; struct dax_device *dax_dev; @@ -350,7 +352,10 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, inode = dax_inode(dax_dev); dev->devt = inode->i_rdev; - dev->bus = &dax_bus_type; + if (subsys == DEV_DAX_BUS) + dev->bus = &dax_bus_type; + else + dev->class = dax_class; dev->parent = parent; dev->groups = dax_attribute_groups; dev->release = dev_dax_release; @@ -374,7 +379,7 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, return ERR_PTR(rc); } -EXPORT_SYMBOL_GPL(devm_create_dev_dax); +EXPORT_SYMBOL_GPL(__devm_create_dev_dax); static int match_always_count; @@ -407,6 +412,7 @@ EXPORT_SYMBOL_GPL(__dax_driver_register); void dax_driver_unregister(struct dax_device_driver *dax_drv) { + struct device_driver *drv = &dax_drv->drv; struct dax_id *dax_id, *_id; mutex_lock(&dax_bus_lock); @@ -416,15 +422,28 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv) kfree(dax_id); } mutex_unlock(&dax_bus_lock); + driver_unregister(drv); } EXPORT_SYMBOL_GPL(dax_driver_unregister); int __init dax_bus_init(void) { - return bus_register(&dax_bus_type); + int rc; + + if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) { + dax_class = class_create(THIS_MODULE, "dax"); + if (IS_ERR(dax_class)) + return PTR_ERR(dax_class); + } + + rc = bus_register(&dax_bus_type); + if (rc) + class_destroy(dax_class); + return rc; } void __exit dax_bus_exit(void) { bus_unregister(&dax_bus_type); + class_destroy(dax_class); } diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 395ab812367c8..ce977552ffb5c 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -2,7 +2,8 @@ /* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */ #ifndef __DAX_BUS_H__ #define __DAX_BUS_H__ -struct device; +#include + struct dev_dax; struct resource; struct dax_device; @@ -10,8 +11,23 @@ struct dax_region; void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, unsigned long flags); -struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, int id, - struct dev_pagemap *pgmap); + +enum dev_dax_subsys { + DEV_DAX_BUS, + DEV_DAX_CLASS, +}; + +struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, + struct dev_pagemap *pgmap, enum dev_dax_subsys subsys); + +static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, + int id, struct dev_pagemap *pgmap) +{ + return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS); +} + +/* to be deleted when DEV_DAX_CLASS is removed */ +struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys); struct dax_device_driver { struct device_driver drv; @@ -26,6 +42,10 @@ int __dax_driver_register(struct dax_device_driver *dax_drv, void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); +#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT) +int dev_dax_probe(struct device *dev); +#endif + /* * While run_dax() is potentially a generic operation that could be * defined in include/linux/dax.h we don't want to grow any users diff --git a/drivers/dax/device.c b/drivers/dax/device.c index ad3120395f7a2..e428468ab6618 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -445,7 +445,7 @@ static void dev_dax_kill(void *dev_dax) kill_dev_dax(dev_dax); } -static int dev_dax_probe(struct device *dev) +int dev_dax_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); struct dax_device *dax_dev = dev_dax->dax_dev; @@ -484,7 +484,11 @@ static int dev_dax_probe(struct device *dev) inode = dax_inode(dax_dev); cdev = inode->i_cdev; cdev_init(cdev, &dax_fops); - cdev->owner = dev->driver->owner; + if (dev->class) { + /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */ + cdev->owner = dev->parent->driver->owner; + } else + cdev->owner = dev->driver->owner; cdev_set_parent(cdev, &dev->kobj); rc = cdev_add(cdev, dev->devt, 1); if (rc) @@ -497,6 +501,7 @@ static int dev_dax_probe(struct device *dev) run_dax(dax_dev); return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax); } +EXPORT_SYMBOL_GPL(dev_dax_probe); static int dev_dax_remove(struct device *dev) { diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c deleted file mode 100644 index d3cefa7868ac7..0000000000000 --- a/drivers/dax/pmem.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include -#include -#include -#include -#include "../nvdimm/pfn.h" -#include "../nvdimm/nd.h" -#include "bus.h" - -static int dax_pmem_probe(struct device *dev) -{ - struct resource res; - int rc, id, region_id; - resource_size_t offset; - struct nd_pfn_sb *pfn_sb; - struct dev_dax *dev_dax; - struct nd_namespace_io *nsio; - struct dax_region *dax_region; - struct dev_pagemap pgmap = { 0 }; - struct nd_namespace_common *ndns; - struct nd_dax *nd_dax = to_nd_dax(dev); - struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; - - ndns = nvdimm_namespace_common_probe(dev); - if (IS_ERR(ndns)) - return PTR_ERR(ndns); - nsio = to_nd_namespace_io(&ndns->dev); - - /* parse the 'pfn' info block via ->rw_bytes */ - rc = devm_nsio_enable(dev, nsio); - if (rc) - return rc; - rc = nvdimm_setup_pfn(nd_pfn, &pgmap); - if (rc) - return rc; - devm_nsio_disable(dev, nsio); - - /* reserve the metadata area, device-dax will reserve the data */ - pfn_sb = nd_pfn->pfn_sb; - offset = le64_to_cpu(pfn_sb->dataoff); - if (!devm_request_mem_region(dev, nsio->res.start, offset, - dev_name(&ndns->dev))) { - dev_warn(dev, "could not reserve metadata\n"); - return -EBUSY; - } - - rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); - if (rc != 2) - return -EINVAL; - - /* adjust the dax_region resource to the start of data */ - memcpy(&res, &pgmap.res, sizeof(res)); - res.start += offset; - dax_region = alloc_dax_region(dev, region_id, &res, - le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP); - if (!dax_region) - return -ENOMEM; - - dev_dax = devm_create_dev_dax(dax_region, id, &pgmap); - - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - - return PTR_ERR_OR_ZERO(dev_dax); -} - -static struct nd_device_driver dax_pmem_driver = { - .probe = dax_pmem_probe, - .drv = { - .name = "dax_pmem", - }, - .type = ND_DRIVER_DAX_PMEM, -}; - -module_nd_driver(dax_pmem_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/drivers/dax/pmem/Makefile b/drivers/dax/pmem/Makefile new file mode 100644 index 0000000000000..e2e79bd3fdcf9 --- /dev/null +++ b/drivers/dax/pmem/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o +obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o + +dax_pmem-y := pmem.o +dax_pmem_core-y := core.o +dax_pmem_compat-y := compat.o diff --git a/drivers/dax/pmem/compat.c b/drivers/dax/pmem/compat.c new file mode 100644 index 0000000000000..d7b15e6f30c5b --- /dev/null +++ b/drivers/dax/pmem/compat.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include "../bus.h" + +/* we need the private definitions to implement compat suport */ +#include "../dax-private.h" + +static int dax_pmem_compat_probe(struct device *dev) +{ + struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS); + int rc; + + if (IS_ERR(dev_dax)) + return PTR_ERR(dev_dax); + + if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL)) + return -ENOMEM; + + device_lock(&dev_dax->dev); + rc = dev_dax_probe(&dev_dax->dev); + device_unlock(&dev_dax->dev); + + devres_close_group(&dev_dax->dev, dev_dax); + if (rc) + devres_release_group(&dev_dax->dev, dev_dax); + + return rc; +} + +static int dax_pmem_compat_release(struct device *dev, void *data) +{ + device_lock(dev); + devres_release_group(dev, to_dev_dax(dev)); + device_unlock(dev); + + return 0; +} + +static int dax_pmem_compat_remove(struct device *dev) +{ + device_for_each_child(dev, NULL, dax_pmem_compat_release); + return 0; +} + +static struct nd_device_driver dax_pmem_compat_driver = { + .probe = dax_pmem_compat_probe, + .remove = dax_pmem_compat_remove, + .drv = { + .name = "dax_pmem_compat", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_compat_init(void) +{ + return nd_driver_register(&dax_pmem_compat_driver); +} +module_init(dax_pmem_compat_init); + +static void __exit dax_pmem_compat_exit(void) +{ + driver_unregister(&dax_pmem_compat_driver.drv); +} +module_exit(dax_pmem_compat_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c new file mode 100644 index 0000000000000..bdcff1b14e95c --- /dev/null +++ b/drivers/dax/pmem/core.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include "../../nvdimm/pfn.h" +#include "../../nvdimm/nd.h" +#include "../bus.h" + +struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) +{ + struct resource res; + int rc, id, region_id; + resource_size_t offset; + struct nd_pfn_sb *pfn_sb; + struct dev_dax *dev_dax; + struct nd_namespace_io *nsio; + struct dax_region *dax_region; + struct dev_pagemap pgmap = { 0 }; + struct nd_namespace_common *ndns; + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return ERR_CAST(ndns); + nsio = to_nd_namespace_io(&ndns->dev); + + /* parse the 'pfn' info block via ->rw_bytes */ + rc = devm_nsio_enable(dev, nsio); + if (rc) + return ERR_PTR(rc); + rc = nvdimm_setup_pfn(nd_pfn, &pgmap); + if (rc) + return ERR_PTR(rc); + devm_nsio_disable(dev, nsio); + + /* reserve the metadata area, device-dax will reserve the data */ + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + if (!devm_request_mem_region(dev, nsio->res.start, offset, + dev_name(&ndns->dev))) { + dev_warn(dev, "could not reserve metadata\n"); + return ERR_PTR(-EBUSY); + } + + rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); + if (rc != 2) + return ERR_PTR(-EINVAL); + + /* adjust the dax_region resource to the start of data */ + memcpy(&res, &pgmap.res, sizeof(res)); + res.start += offset; + dax_region = alloc_dax_region(dev, region_id, &res, + le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP); + if (!dax_region) + return ERR_PTR(-ENOMEM); + + dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys); + + /* child dev_dax instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + + return dev_dax; +} +EXPORT_SYMBOL_GPL(__dax_pmem_probe); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/dax/pmem/pmem.c b/drivers/dax/pmem/pmem.c new file mode 100644 index 0000000000000..0ae4238a0ef88 --- /dev/null +++ b/drivers/dax/pmem/pmem.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include "../bus.h" + +static int dax_pmem_probe(struct device *dev) +{ + return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS)); +} + +static struct nd_device_driver dax_pmem_driver = { + .probe = dax_pmem_probe, + .drv = { + .name = "dax_pmem", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_init(void) +{ + return nd_driver_register(&dax_pmem_driver); +} +module_init(dax_pmem_init); + +static void __exit dax_pmem_exit(void) +{ + driver_unregister(&dax_pmem_driver.drv); +} +module_exit(dax_pmem_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT) +/* For compat builds, don't load this module by default */ +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); +#endif diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index c9b500a652d53..e1286d2cdfbf9 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -35,6 +35,8 @@ obj-$(CONFIG_DAX) += dax.o endif obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o +obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o nfit-y := $(ACPI_SRC)/core.o nfit-y += $(ACPI_SRC)/intel.o @@ -65,7 +67,9 @@ device_dax-y += dax-dev.o device_dax-y += device_dax_test.o device_dax-y += config_check.o -dax_pmem-y := $(DAX_SRC)/pmem.o +dax_pmem-y := $(DAX_SRC)/pmem/pmem.o +dax_pmem_core-y := $(DAX_SRC)/pmem/core.o +dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o dax_pmem-y += config_check.o libnvdimm-y := $(NVDIMM_SRC)/core.o -- cgit v1.2.3 From 8fc5c73554db0ac18c0c6ac5b2099ab917f83bdf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Nov 2018 12:43:07 -0800 Subject: acpi/nfit, device-dax: Identify differentiated memory with a unique numa-node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent memory, as described by the ACPI NFIT (NVDIMM Firmware Interface Table), is the first known instance of a memory range described by a unique "target" proximity domain. Where "initiator" and "target" proximity domains is an approach that the ACPI HMAT (Heterogeneous Memory Attributes Table) uses to described the unique performance properties of a memory range relative to a given initiator (e.g. CPU or DMA device). Currently the numa-node for a /dev/pmemX block-device or /dev/daxX.Y char-device follows the traditional notion of 'numa-node' where the attribute conveys the closest online numa-node. That numa-node attribute is useful for cpu-binding and memory-binding processes *near* the device. However, when the memory range backing a 'pmem', or 'dax' device is onlined (memory hot-add) the memory-only-numa-node representing that address needs to be differentiated from the set of online nodes. In other words, the numa-node association of the device depends on whether you can bind processes *near* the cpu-numa-node in the offline device-case, or bind process *on* the memory-range directly after the backing address range is onlined. Allow for the case that platform firmware describes persistent memory with a unique proximity domain, i.e. when it is distinct from the proximity of DRAM and CPUs that are on the same socket. Plumb the Linux numa-node translation of that proximity through the libnvdimm region device to namespaces that are in device-dax mode. With this in place the proposed kmem driver [1] can optionally discover a unique numa-node number for the address range as it transitions the memory from an offline state managed by a device-driver to an online memory range managed by the core-mm. [1]: https://lore.kernel.org/lkml/20181022201317.8558C1D8@viggo.jf.intel.com Reported-by: Fan Du Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Dave Hansen Cc: Jérôme Glisse Reviewed-by: Yang Shi Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 1 + drivers/acpi/nfit/core.c | 8 ++++++-- drivers/acpi/numa.c | 1 + drivers/dax/bus.c | 4 +++- drivers/dax/bus.h | 3 ++- drivers/dax/dax-private.h | 4 ++++ drivers/dax/pmem/core.c | 4 +++- drivers/nvdimm/e820.c | 1 + drivers/nvdimm/nd.h | 2 +- drivers/nvdimm/of_pmem.c | 1 + drivers/nvdimm/region_devs.c | 1 + include/linux/acpi.h | 5 +++++ include/linux/libnvdimm.h | 1 + 13 files changed, 30 insertions(+), 6 deletions(-) (limited to 'drivers/dax/bus.c') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 7d6457ab5d345..8806ac8226275 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -236,6 +236,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; ndr_desc.numa_node = dev_to_node(&p->pdev->dev); + ndr_desc.target_node = ndr_desc.numa_node; ndr_desc.res = &p->res; ndr_desc.of_node = p->dn; ndr_desc.provider_data = p; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 011d3db19c80a..475899974c700 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2869,11 +2869,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ndr_desc->res = &res; ndr_desc->provider_data = nfit_spa; ndr_desc->attr_groups = acpi_nfit_region_attribute_groups; - if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) + if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) { ndr_desc->numa_node = acpi_map_pxm_to_online_node( spa->proximity_domain); - else + ndr_desc->target_node = acpi_map_pxm_to_node( + spa->proximity_domain); + } else { ndr_desc->numa_node = NUMA_NO_NODE; + ndr_desc->target_node = NUMA_NO_NODE; + } /* * Persistence domain bits are hierarchical, if diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 274699463b4f1..b9d86babb13ae 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm) return node; } +EXPORT_SYMBOL(acpi_map_pxm_to_node); /** * acpi_map_pxm_to_online_node - Map proximity ID to online node diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 5681685002175..c620ad52d7e5c 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -214,7 +214,7 @@ static void dax_region_unregister(void *region) } struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, unsigned int align, + struct resource *res, int target_node, unsigned int align, unsigned long pfn_flags) { struct dax_region *dax_region; @@ -244,6 +244,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, dax_region->id = region_id; dax_region->align = align; dax_region->dev = parent; + dax_region->target_node = target_node; if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { kfree(dax_region); return NULL; @@ -348,6 +349,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, dev_dax->dax_dev = dax_dev; dev_dax->region = dax_region; + dev_dax->target_node = dax_region->target_node; kref_get(&dax_region->kref); inode = dax_inode(dax_dev); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index ce977552ffb5c..8619e32999436 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -10,7 +10,8 @@ struct dax_device; struct dax_region; void dax_region_put(struct dax_region *dax_region); struct dax_region *alloc_dax_region(struct device *parent, int region_id, - struct resource *res, unsigned int align, unsigned long flags); + struct resource *res, int target_node, unsigned int align, + unsigned long flags); enum dev_dax_subsys { DEV_DAX_BUS, diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index a82ce48f58844..a45612148ca02 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -26,6 +26,7 @@ void dax_bus_exit(void); /** * struct dax_region - mapping infrastructure for dax devices * @id: kernel-wide unique region for a memory range + * @target_node: effective numa node if this memory range is onlined * @kref: to pin while other agents have a need to do lookups * @dev: parent device backing this region * @align: allocation and mapping alignment for child dax devices @@ -34,6 +35,7 @@ void dax_bus_exit(void); */ struct dax_region { int id; + int target_node; struct kref kref; struct device *dev; unsigned int align; @@ -46,6 +48,7 @@ struct dax_region { * data while the device is activated in the driver. * @region - parent region * @dax_dev - core dax functionality + * @target_node: effective numa node if dev_dax memory range is onlined * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) * @ref: pgmap reference count (driver owned) @@ -54,6 +57,7 @@ struct dax_region { struct dev_dax { struct dax_region *region; struct dax_device *dax_dev; + int target_node; struct device dev; struct dev_pagemap pgmap; struct percpu_ref ref; diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index bdcff1b14e95c..f71019ce06470 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -20,6 +20,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct nd_region *nd_region = to_nd_region(dev->parent); ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -52,7 +53,8 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) memcpy(&res, &pgmap.res, sizeof(res)); res.start += offset; dax_region = alloc_dax_region(dev, region_id, &res, - le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP); + nd_region->target_node, le32_to_cpu(pfn_sb->align), + PFN_DEV|PFN_MAP); if (!dax_region) return ERR_PTR(-ENOMEM); diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index 521eaf53a52aa..36be9b6191876 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -47,6 +47,7 @@ static int e820_register_one(struct resource *res, void *data) ndr_desc.res = res; ndr_desc.attr_groups = e820_pmem_region_attribute_groups; ndr_desc.numa_node = e820_range_to_nid(res->start); + ndr_desc.target_node = ndr_desc.numa_node; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) return -ENXIO; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index cfde992684e7d..0b3d7595b3cb8 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -153,7 +153,7 @@ struct nd_region { u16 ndr_mappings; u64 ndr_size; u64 ndr_start; - int id, num_lanes, ro, numa_node; + int id, num_lanes, ro, numa_node, target_node; void *provider_data; struct kernfs_node *bb_state; struct badblocks bb; diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 0a701837dfc0b..ecaaa27438e25 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -68,6 +68,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; ndr_desc.numa_node = dev_to_node(&pdev->dev); + ndr_desc.target_node = ndr_desc.numa_node; ndr_desc.res = &pdev->resource[i]; ndr_desc.of_node = np; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f2928..caf2f3129ccdb 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1065,6 +1065,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; + nd_region->target_node = ndr_desc->target_node; ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); ida_init(&nd_region->pfn_ida); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 87715f20b69a0..eddf2736e5a61 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -400,12 +400,17 @@ extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_online_node(int pxm); +int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); #else static inline int acpi_map_pxm_to_online_node(int pxm) { return 0; } +static inline int acpi_map_pxm_to_node(int pxm) +{ + return 0; +} static inline int acpi_get_node(acpi_handle handle) { return 0; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907d..56bc545ad3b25 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -128,6 +128,7 @@ struct nd_region_desc { void *provider_data; int num_lanes; int numa_node; + int target_node; unsigned long flags; struct device_node *of_node; }; -- cgit v1.2.3 From 664525b2d84abca1074c9546654ae9689de8a818 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 24 Jan 2019 13:12:04 -0800 Subject: device-dax: Auto-bind device after successful new_id The typical 'new_id' attribute behavior is to immediately attach a device to its driver after a new device-id is added. Implement this behavior for the dax bus. Reported-by: Alexander Duyck Reported-by: Brice Goglin Cc: Dave Hansen Signed-off-by: Dan Williams --- drivers/dax/bus.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index c620ad52d7e5c..a410154d75fb9 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -57,8 +57,13 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) return match; } +enum id_action { + ID_REMOVE, + ID_ADD, +}; + static ssize_t do_id_store(struct device_driver *drv, const char *buf, - size_t count, bool add) + size_t count, enum id_action action) { struct dax_device_driver *dax_drv = to_dax_drv(drv); unsigned int region_id, id; @@ -77,7 +82,7 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf, mutex_lock(&dax_bus_lock); dax_id = __dax_match_id(dax_drv, buf); if (!dax_id) { - if (add) { + if (action == ID_ADD) { dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); if (dax_id) { strncpy(dax_id->dev_name, buf, DAX_NAME_LEN); @@ -86,26 +91,33 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf, rc = -ENOMEM; } else /* nothing to remove */; - } else if (!add) { + } else if (action == ID_REMOVE) { list_del(&dax_id->list); kfree(dax_id); } else /* dax_id already added */; mutex_unlock(&dax_bus_lock); - return rc; + + if (rc < 0) + return rc; + if (action == ID_ADD) + rc = driver_attach(drv); + if (rc) + return rc; + return count; } static ssize_t new_id_store(struct device_driver *drv, const char *buf, size_t count) { - return do_id_store(drv, buf, count, true); + return do_id_store(drv, buf, count, ID_ADD); } static DRIVER_ATTR_WO(new_id); static ssize_t remove_id_store(struct device_driver *drv, const char *buf, size_t count) { - return do_id_store(drv, buf, count, false); + return do_id_store(drv, buf, count, ID_REMOVE); } static DRIVER_ATTR_WO(remove_id); -- cgit v1.2.3 From 21c75763a3ae18679e5c4e2260aa9379b073566b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 20 Feb 2019 11:39:36 -0800 Subject: device-dax: Add a 'target_node' attribute The target-node attribute is the Linux numa-node that a device-dax instance may create when it is online. Prior to being online the device's 'numa_node' property reflects the closest online cpu node which is the typical expectation of a device 'numa_node'. Once it is online it becomes its own distinct numa node, i.e. 'target_node'. Export the 'target_node' property to give userspace tooling the ability to predict the effective numa-node from a device-dax instance configured to provide 'System RAM' capacity. Cc: Vishal Verma Reported-by: Dave Hansen Signed-off-by: Dan Williams --- drivers/dax/bus.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index a410154d75fb9..28c3324271acd 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -279,13 +279,41 @@ static ssize_t size_show(struct device *dev, } static DEVICE_ATTR_RO(size); +static int dev_dax_target_node(struct dev_dax *dev_dax) +{ + struct dax_region *dax_region = dev_dax->region; + + return dax_region->target_node; +} + +static ssize_t target_node_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_dax *dev_dax = to_dev_dax(dev); + + return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax)); +} +static DEVICE_ATTR_RO(target_node); + +static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct dev_dax *dev_dax = to_dev_dax(dev); + + if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) + return 0; + return a->mode; +} + static struct attribute *dev_dax_attributes[] = { &dev_attr_size.attr, + &dev_attr_target_node.attr, NULL, }; static const struct attribute_group dev_dax_attribute_group = { .attrs = dev_dax_attributes, + .is_visible = dev_dax_visible, }; static const struct attribute_group *dax_attribute_groups[] = { -- cgit v1.2.3 From c347bd71dcdb2d0ac8b3a771486584dca8c8dd80 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 22 Feb 2019 16:58:54 -0700 Subject: device-dax: Add a 'modalias' attribute to DAX 'bus' devices Add a 'modalias' attribute to devices under the DAX bus so that userspace is able to dynamically load modules as needed. Normally, udev can get the modalias from 'uevent', and that is correctly set up by the DAX bus. However other tooling such as 'libndctl' for interacting with drivers/nvdimm/, and 'libdaxctl' for drivers/dax/ can also use the modalias to dynamically load modules via libkmod lookups. The 'nd' bus set up by the libnvdimm subsystem exports a modalias attribute. Imitate this to export the same for the 'dax' bus. Cc: Dave Hansen Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/dax/bus.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/dax/bus.c') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 28c3324271acd..2109cfe80219d 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -295,6 +295,17 @@ static ssize_t target_node_show(struct device *dev, } static DEVICE_ATTR_RO(target_node); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + /* + * We only ever expect to handle device-dax instances, i.e. the + * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero + */ + return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); +} +static DEVICE_ATTR_RO(modalias); + static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); @@ -306,6 +317,7 @@ static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) } static struct attribute *dev_dax_attributes[] = { + &dev_attr_modalias.attr, &dev_attr_size.attr, &dev_attr_target_node.attr, NULL, -- cgit v1.2.3