diff options
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r-- | drivers/nvdimm/Kconfig | 2 | ||||
-rw-r--r-- | drivers/nvdimm/blk.c | 11 | ||||
-rw-r--r-- | drivers/nvdimm/btt_devs.c | 3 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 212 | ||||
-rw-r--r-- | drivers/nvdimm/claim.c | 7 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 253 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 5 | ||||
-rw-r--r-- | drivers/nvdimm/e820.c | 1 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 5 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 10 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 85 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.h | 24 | ||||
-rw-r--r-- | drivers/nvdimm/region.c | 19 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 154 |
14 files changed, 584 insertions, 207 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 7c8a3bf..124c243 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -1,6 +1,7 @@ menuconfig LIBNVDIMM tristate "NVDIMM (Non-Volatile Memory Device) Support" depends on PHYS_ADDR_T_64BIT + depends on HAS_IOMEM depends on BLK_DEV help Generic support for non-volatile memory devices including @@ -19,7 +20,6 @@ if LIBNVDIMM config BLK_DEV_PMEM tristate "PMEM: Persistent memory block device support" default LIBNVDIMM - depends on HAS_IOMEM select ND_BTT if BTT select ND_PFN if NVDIMM_PFN help diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 7e262ef..9faaa96 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) q = blk_alloc_queue(GFP_KERNEL); if (!q) return -ENOMEM; - if (devm_add_action(dev, nd_blk_release_queue, q)) { - blk_cleanup_queue(q); + if (devm_add_action_or_reset(dev, nd_blk_release_queue, q)) return -ENOMEM; - } blk_queue_make_request(q, nd_blk_make_request); blk_queue_max_hw_sectors(q, UINT_MAX); @@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) disk = alloc_disk(0); if (!disk) return -ENOMEM; - if (devm_add_action(dev, nd_blk_release_disk, disk)) { - put_disk(disk); - return -ENOMEM; - } disk->first_minor = 0; disk->fops = &nd_blk_fops; @@ -295,6 +289,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) set_capacity(disk, 0); device_add_disk(dev, disk); + if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) + return -ENOMEM; + if (nsblk_meta_size(nsblk)) { int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 816d0da..3fa7919 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region) { struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); - if (dev) - __nd_device_register(dev); + __nd_device_register(dev); return dev; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5e4e5c7..458daf9 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -31,6 +31,7 @@ int nvdimm_major; static int nvdimm_bus_major; static struct class *nd_class; +static DEFINE_IDA(nd_ida); static int to_nd_device_type(struct device *dev) { @@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) to_nd_device_type(dev)); } -static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) -{ - struct nd_device_driver *nd_drv = to_nd_device_driver(drv); - - return !!test_bit(to_nd_device_type(dev), &nd_drv->type); -} - static struct module *to_bus_provider(struct device *dev) { /* pin bus providers while regions are enabled */ if (is_nd_pmem(dev) || is_nd_blk(dev)) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - return nvdimm_bus->module; + return nvdimm_bus->nd_desc->module; } return NULL; } @@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +static void nvdimm_bus_shutdown(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nd_device_driver *nd_drv = NULL; + + if (dev->driver) + nd_drv = to_nd_device_driver(dev->driver); + + if (nd_drv && nd_drv->shutdown) { + nd_drv->shutdown(dev); + dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n", + dev->driver->name, dev_name(dev)); + } +} + void nd_device_notify(struct device *dev, enum nvdimm_event event) { device_lock(dev); @@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, } EXPORT_SYMBOL_GPL(nvdimm_clear_poison); +static int nvdimm_bus_match(struct device *dev, struct device_driver *drv); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, .match = nvdimm_bus_match, .probe = nvdimm_bus_probe, .remove = nvdimm_bus_remove, + .shutdown = nvdimm_bus_shutdown, +}; + +static void nvdimm_bus_release(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + ida_simple_remove(&nd_ida, nvdimm_bus->id); + kfree(nvdimm_bus); +} + +static bool is_nvdimm_bus(struct device *dev) +{ + return dev->release == nvdimm_bus_release; +} + +struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) +{ + struct device *dev; + + for (dev = nd_dev; dev; dev = dev->parent) + if (is_nvdimm_bus(dev)) + break; + dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); + if (dev) + return to_nvdimm_bus(dev); + return NULL; +} + +struct nvdimm_bus *to_nvdimm_bus(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + WARN_ON(!is_nvdimm_bus(dev)); + return nvdimm_bus; +} +EXPORT_SYMBOL_GPL(to_nvdimm_bus); + +struct nvdimm_bus *nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nd_desc) +{ + struct nvdimm_bus *nvdimm_bus; + int rc; + + nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); + if (!nvdimm_bus) + return NULL; + INIT_LIST_HEAD(&nvdimm_bus->list); + INIT_LIST_HEAD(&nvdimm_bus->mapping_list); + INIT_LIST_HEAD(&nvdimm_bus->poison_list); + init_waitqueue_head(&nvdimm_bus->probe_wait); + nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); + mutex_init(&nvdimm_bus->reconfig_mutex); + if (nvdimm_bus->id < 0) { + kfree(nvdimm_bus); + return NULL; + } + nvdimm_bus->nd_desc = nd_desc; + nvdimm_bus->dev.parent = parent; + nvdimm_bus->dev.release = nvdimm_bus_release; + nvdimm_bus->dev.groups = nd_desc->attr_groups; + nvdimm_bus->dev.bus = &nvdimm_bus_type; + dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); + rc = device_register(&nvdimm_bus->dev); + if (rc) { + dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); + goto err; + } + + return nvdimm_bus; + err: + put_device(&nvdimm_bus->dev); + return NULL; +} +EXPORT_SYMBOL_GPL(nvdimm_bus_register); + +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) +{ + if (!nvdimm_bus) + return; + device_unregister(&nvdimm_bus->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); + +static int child_unregister(struct device *dev, void *data) +{ + /* + * the singular ndctl class device per bus needs to be + * "device_destroy"ed, so skip it here + * + * i.e. remove classless children + */ + if (dev->class) + /* pass */; + else + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +static void free_poison_list(struct list_head *poison_list) +{ + struct nd_poison *pl, *next; + + list_for_each_entry_safe(pl, next, poison_list, list) { + list_del(&pl->list); + kfree(pl); + } + list_del_init(poison_list); +} + +static int nd_bus_remove(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + + mutex_lock(&nvdimm_bus_list_mutex); + list_del_init(&nvdimm_bus->list); + mutex_unlock(&nvdimm_bus_list_mutex); + + nd_synchronize(); + device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + + nvdimm_bus_lock(&nvdimm_bus->dev); + free_poison_list(&nvdimm_bus->poison_list); + nvdimm_bus_unlock(&nvdimm_bus->dev); + + nvdimm_bus_destroy_ndctl(nvdimm_bus); + + return 0; +} + +static int nd_bus_probe(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + int rc; + + rc = nvdimm_bus_create_ndctl(nvdimm_bus); + if (rc) + return rc; + + mutex_lock(&nvdimm_bus_list_mutex); + list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); + mutex_unlock(&nvdimm_bus_list_mutex); + + /* enable bus provider attributes to look up their local context */ + dev_set_drvdata(dev, nvdimm_bus->nd_desc); + + return 0; +} + +static struct nd_device_driver nd_bus_driver = { + .probe = nd_bus_probe, + .remove = nd_bus_remove, + .drv = { + .name = "nd_bus", + .suppress_bind_attrs = true, + .bus = &nvdimm_bus_type, + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + }, }; +static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); + + if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver) + return true; + + return !!test_bit(to_nd_device_type(dev), &nd_drv->type); +} + static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); void nd_synchronize(void) @@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, "ndctl%d", nvdimm_bus->id); - if (IS_ERR(dev)) { + if (IS_ERR(dev)) dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", nvdimm_bus->id, PTR_ERR(dev)); - return PTR_ERR(dev); - } - return 0; + return PTR_ERR_OR_ZERO(dev); } void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) @@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void) goto err_class; } + rc = driver_register(&nd_bus_driver.drv); + if (rc) + goto err_nd_bus; + return 0; + err_nd_bus: + class_destroy(nd_class); err_class: unregister_chrdev(nvdimm_major, "dimmctl"); err_dimm_chrdev: @@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void) void nvdimm_bus_exit(void) { + driver_unregister(&nd_bus_driver.drv); class_destroy(nd_class); unregister_chrdev(nvdimm_bus_major, "ndctl"); unregister_chrdev(nvdimm_major, "dimmctl"); bus_unregister(&nvdimm_bus_type); + ida_destroy(&nd_ida); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 8b2e3c4..d5dc80c 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, return memcpy_from_pmem(buf, nsio->addr + offset, size); } else { memcpy_to_pmem(nsio->addr + offset, buf, size); - wmb_pmem(); + nvdimm_flush(to_nd_region(ndns->dev.parent)); } return 0; @@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) nsio->addr = devm_memremap(dev, res->start, resource_size(res), ARCH_MEMREMAP_PMEM); - if (IS_ERR(nsio->addr)) - return PTR_ERR(nsio->addr); - return 0; + + return PTR_ERR_OR_ZERO(nsio->addr); } EXPORT_SYMBOL_GPL(devm_nsio_enable); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index be89764..715583f 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -20,12 +20,12 @@ #include <linux/ndctl.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/io.h> #include "nd-core.h" #include "nd.h" LIST_HEAD(nvdimm_bus_list); DEFINE_MUTEX(nvdimm_bus_list_mutex); -static DEFINE_IDA(nd_ida); void nvdimm_bus_lock(struct device *dev) { @@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev) } EXPORT_SYMBOL(is_nvdimm_bus_locked); +struct nvdimm_map { + struct nvdimm_bus *nvdimm_bus; + struct list_head list; + resource_size_t offset; + unsigned long flags; + size_t size; + union { + void *mem; + void __iomem *iomem; + }; + struct kref kref; +}; + +static struct nvdimm_map *find_nvdimm_map(struct device *dev, + resource_size_t offset) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_map *nvdimm_map; + + list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list) + if (nvdimm_map->offset == offset) + return nvdimm_map; + return NULL; +} + +static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, + resource_size_t offset, size_t size, unsigned long flags) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_map *nvdimm_map; + + nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL); + if (!nvdimm_map) + return NULL; + + INIT_LIST_HEAD(&nvdimm_map->list); + nvdimm_map->nvdimm_bus = nvdimm_bus; + nvdimm_map->offset = offset; + nvdimm_map->flags = flags; + nvdimm_map->size = size; + kref_init(&nvdimm_map->kref); + + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) + goto err_request_region; + + if (flags) + nvdimm_map->mem = memremap(offset, size, flags); + else + nvdimm_map->iomem = ioremap(offset, size); + + if (!nvdimm_map->mem) + goto err_map; + + dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!", + __func__); + list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list); + + return nvdimm_map; + + err_map: + release_mem_region(offset, size); + err_request_region: + kfree(nvdimm_map); + return NULL; +} + +static void nvdimm_map_release(struct kref *kref) +{ + struct nvdimm_bus *nvdimm_bus; + struct nvdimm_map *nvdimm_map; + + nvdimm_map = container_of(kref, struct nvdimm_map, kref); + nvdimm_bus = nvdimm_map->nvdimm_bus; + + dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset); + list_del(&nvdimm_map->list); + if (nvdimm_map->flags) + memunmap(nvdimm_map->mem); + else + iounmap(nvdimm_map->iomem); + release_mem_region(nvdimm_map->offset, nvdimm_map->size); + kfree(nvdimm_map); +} + +static void nvdimm_map_put(void *data) +{ + struct nvdimm_map *nvdimm_map = data; + struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus; + + nvdimm_bus_lock(&nvdimm_bus->dev); + kref_put(&nvdimm_map->kref, nvdimm_map_release); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} + +/** + * devm_nvdimm_memremap - map a resource that is shared across regions + * @dev: device that will own a reference to the shared mapping + * @offset: physical base address of the mapping + * @size: mapping size + * @flags: memremap flags, or, if zero, perform an ioremap instead + */ +void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) +{ + struct nvdimm_map *nvdimm_map; + + nvdimm_bus_lock(dev); + nvdimm_map = find_nvdimm_map(dev, offset); + if (!nvdimm_map) + nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags); + else + kref_get(&nvdimm_map->kref); + nvdimm_bus_unlock(dev); + + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) + return NULL; + + return nvdimm_map->mem; +} +EXPORT_SYMBOL_GPL(devm_nvdimm_memremap); + u64 nd_fletcher64(void *addr, size_t len, bool le) { u32 *buf = addr; @@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le) } EXPORT_SYMBOL_GPL(nd_fletcher64); -static void nvdimm_bus_release(struct device *dev) -{ - struct nvdimm_bus *nvdimm_bus; - - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); - ida_simple_remove(&nd_ida, nvdimm_bus->id); - kfree(nvdimm_bus); -} - -struct nvdimm_bus *to_nvdimm_bus(struct device *dev) -{ - struct nvdimm_bus *nvdimm_bus; - - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); - WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); - return nvdimm_bus; -} -EXPORT_SYMBOL_GPL(to_nvdimm_bus); - struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) { /* struct nvdimm_bus definition is private to libnvdimm */ @@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) } EXPORT_SYMBOL_GPL(to_nd_desc); -struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) +struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus) { - struct device *dev; - - for (dev = nd_dev; dev; dev = dev->parent) - if (dev->release == nvdimm_bus_release) - break; - dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); - if (dev) - return to_nvdimm_bus(dev); - return NULL; + /* struct nvdimm_bus definition is private to libnvdimm */ + return &nvdimm_bus->dev; } +EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); static bool is_uuid_sep(char sep) { @@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = { }; EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); -struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, - struct nvdimm_bus_descriptor *nd_desc, struct module *module) -{ - struct nvdimm_bus *nvdimm_bus; - int rc; - - nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); - if (!nvdimm_bus) - return NULL; - INIT_LIST_HEAD(&nvdimm_bus->list); - INIT_LIST_HEAD(&nvdimm_bus->poison_list); - init_waitqueue_head(&nvdimm_bus->probe_wait); - nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); - mutex_init(&nvdimm_bus->reconfig_mutex); - if (nvdimm_bus->id < 0) { - kfree(nvdimm_bus); - return NULL; - } - nvdimm_bus->nd_desc = nd_desc; - nvdimm_bus->module = module; - nvdimm_bus->dev.parent = parent; - nvdimm_bus->dev.release = nvdimm_bus_release; - nvdimm_bus->dev.groups = nd_desc->attr_groups; - dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); - rc = device_register(&nvdimm_bus->dev); - if (rc) { - dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); - goto err; - } - - rc = nvdimm_bus_create_ndctl(nvdimm_bus); - if (rc) - goto err; - - mutex_lock(&nvdimm_bus_list_mutex); - list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); - mutex_unlock(&nvdimm_bus_list_mutex); - - return nvdimm_bus; - err: - put_device(&nvdimm_bus->dev); - return NULL; -} -EXPORT_SYMBOL_GPL(__nvdimm_bus_register); - static void set_badblock(struct badblocks *bb, sector_t s, int num) { dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", @@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); -static void free_poison_list(struct list_head *poison_list) -{ - struct nd_poison *pl, *next; - - list_for_each_entry_safe(pl, next, poison_list, list) { - list_del(&pl->list); - kfree(pl); - } - list_del_init(poison_list); -} - -static int child_unregister(struct device *dev, void *data) -{ - /* - * the singular ndctl class device per bus needs to be - * "device_destroy"ed, so skip it here - * - * i.e. remove classless children - */ - if (dev->class) - /* pass */; - else - nd_device_unregister(dev, ND_SYNC); - return 0; -} - -void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) -{ - if (!nvdimm_bus) - return; - - mutex_lock(&nvdimm_bus_list_mutex); - list_del_init(&nvdimm_bus->list); - mutex_unlock(&nvdimm_bus_list_mutex); - - nd_synchronize(); - device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); - - nvdimm_bus_lock(&nvdimm_bus->dev); - free_poison_list(&nvdimm_bus->poison_list); - nvdimm_bus_unlock(&nvdimm_bus->dev); - - nvdimm_bus_destroy_ndctl(nvdimm_bus); - - device_unregister(&nvdimm_bus->dev); -} -EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); - #ifdef CONFIG_BLK_DEV_INTEGRITY int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) { @@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) if (meta_size == 0) return 0; - bi.profile = NULL; + memset(&bi, 0, sizeof(bi)); + bi.tuple_size = meta_size; bi.tag_size = meta_size; @@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void) nvdimm_bus_exit(); nd_region_devs_exit(); nvdimm_devs_exit(); - ida_destroy(&nd_ida); } MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index bbde28d..d9bba5e 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long cmd_mask) + unsigned long cmd_mask, int num_flush, + struct resource *flush_wpq) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, nvdimm->provider_data = provider_data; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; + nvdimm->num_flush = num_flush; + nvdimm->flush_wpq = flush_wpq; atomic_set(&nvdimm->busy, 0); dev = &nvdimm->dev; dev_set_name(dev, "nmem%d", nvdimm->id); diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index 95825b3..11ea901 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev) nd_desc.attr_groups = e820_pmem_attribute_groups; nd_desc.provider_name = "e820"; + nd_desc.module = THIS_MODULE; nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); if (!nvdimm_bus) goto err; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 284cdaa..38ce6bb 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -26,11 +26,11 @@ extern int nvdimm_major; struct nvdimm_bus { struct nvdimm_bus_descriptor *nd_desc; wait_queue_head_t probe_wait; - struct module *module; struct list_head list; struct device dev; int id, probe_active; struct list_head poison_list; + struct list_head mapping_list; struct mutex reconfig_mutex; }; @@ -40,7 +40,8 @@ struct nvdimm { unsigned long cmd_mask; struct device dev; atomic_t busy; - int id; + int id, num_flush; + struct resource *flush_wpq; }; bool is_nvdimm(struct device *dev); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index d0ac93c..4047639 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -49,9 +49,11 @@ struct nvdimm_drvdata { struct kref kref; }; -struct nd_region_namespaces { - int count; - int active; +struct nd_region_data { + int ns_count; + int ns_active; + unsigned int flush_mask; + void __iomem *flush_wpq[0][0]; }; static inline struct nd_namespace_index *to_namespace_index( @@ -119,7 +121,6 @@ struct nd_region { struct nd_blk_region { int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); void *blk_provider_data; @@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev, } #endif int nd_blk_region_init(struct nd_region *nd_region); +int nd_region_activate(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 36cb390..b511099 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -29,27 +29,28 @@ #include <linux/slab.h> #include <linux/pmem.h> #include <linux/nd.h> +#include "pmem.h" #include "pfn.h" #include "nd.h" -struct pmem_device { - /* One contiguous memory region per device */ - phys_addr_t phys_addr; - /* when non-zero this device is hosting a 'pfn' instance */ - phys_addr_t data_offset; - u64 pfn_flags; - void __pmem *virt_addr; - /* immutable base size of the namespace */ - size_t size; - /* trim size when namespace capacity has been section aligned */ - u32 pfn_pad; - struct badblocks bb; -}; +static struct device *to_dev(struct pmem_device *pmem) +{ + /* + * nvdimm bus services need a 'dev' parameter, and we record the device + * at init in bb.dev. + */ + return pmem->bb.dev; +} + +static struct nd_region *to_region(struct pmem_device *pmem) +{ + return to_nd_region(to_dev(pmem)->parent); +} static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { - struct device *dev = pmem->bb.dev; + struct device *dev = to_dev(pmem); sector_t sector; long cleared; @@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); if (cleared > 0 && cleared / 512) { - dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, (unsigned long long) sector, cleared / 512, cleared / 512 > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared / 512); @@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; - void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + void *pmem_addr = pmem->virt_addr + pmem_off; if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; @@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, return rc; } +/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */ +#ifndef REQ_FLUSH +#define REQ_FLUSH REQ_PREFLUSH +#endif + static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { int rc = 0; @@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct bio_vec bvec; struct bvec_iter iter; struct pmem_device *pmem = q->queuedata; + struct nd_region *nd_region = to_region(pmem); + + if (bio->bi_rw & REQ_FLUSH) + nvdimm_flush(nd_region); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { @@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) if (do_acct) nd_iostat_end(bio, start); - if (bio_data_dir(bio)) - wmb_pmem(); + if (bio->bi_rw & REQ_FUA) + nvdimm_flush(nd_region); bio_endio(bio); return BLK_QC_T_NONE; @@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, int rc; rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); - if (rw & WRITE) - wmb_pmem(); /* * The ->rw_page interface is subtle and tricky. The core @@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, return rc; } -static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, pfn_t *pfn, long size) +/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ +__weak long pmem_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, pfn_t *pfn, long size) { struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = sector * 512 + pmem->data_offset; @@ -195,7 +204,7 @@ static void pmem_release_queue(void *q) blk_cleanup_queue(q); } -void pmem_release_disk(void *disk) +static void pmem_release_disk(void *disk) { del_gendisk(disk); put_disk(disk); @@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(dev->parent); struct vmem_altmap __altmap, *altmap = NULL; struct resource *res = &nsio->res; struct nd_pfn *nd_pfn = NULL; @@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_wmb_pmem()) + if (nvdimm_has_flush(nd_region) < 0) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, res->start, resource_size(res), @@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev, * At release time the queue must be dead before * devm_memremap_pages is unwound */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); + if (devm_add_action_or_reset(dev, pmem_release_queue, q)) return -ENOMEM; - } if (IS_ERR(addr)) return PTR_ERR(addr); - pmem->virt_addr = (void __pmem *) addr; + pmem->virt_addr = addr; + blk_queue_write_cache(q, true, true); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_max_hw_sectors(q, UINT_MAX); @@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev, disk = alloc_disk_node(0, nid); if (!disk) return -ENOMEM; - if (devm_add_action(dev, pmem_release_disk, disk)) { - put_disk(disk); - return -ENOMEM; - } disk->fops = &pmem_fops; disk->queue = q; @@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev, / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, res); disk->bb = &pmem->bb; device_add_disk(dev, disk); + + if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) + return -ENOMEM; + revalidate_disk(disk); return 0; @@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev) { if (is_nd_btt(dev)) nvdimm_namespace_detach_btt(to_nd_btt(dev)); + nvdimm_flush(to_nd_region(dev->parent)); + return 0; } +static void nd_pmem_shutdown(struct device *dev) +{ + nvdimm_flush(to_nd_region(dev->parent)); +} + static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_region *nd_region = to_region(pmem); resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; @@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, .notify = nd_pmem_notify, + .shutdown = nd_pmem_shutdown, .drv = { .name = "nd_pmem", }, diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h new file mode 100644 index 0000000..b4ee4f71 --- /dev/null +++ b/drivers/nvdimm/pmem.h @@ -0,0 +1,24 @@ +#ifndef __NVDIMM_PMEM_H__ +#define __NVDIMM_PMEM_H__ +#include <linux/badblocks.h> +#include <linux/types.h> +#include <linux/pfn_t.h> +#include <linux/fs.h> + +long pmem_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, pfn_t *pfn, long size); +/* this definition is in it's own header for tools/testing/nvdimm to consume */ +struct pmem_device { + /* One contiguous memory region per device */ + phys_addr_t phys_addr; + /* when non-zero this device is hosting a 'pfn' instance */ + phys_addr_t data_offset; + u64 pfn_flags; + void *virt_addr; + /* immutable base size of the namespace */ + size_t size; + /* trim size when namespace capacity has been section aligned */ + u32 pfn_pad; + struct badblocks bb; +}; +#endif /* __NVDIMM_PMEM_H__ */ diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 05a9123..8f24177 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev) { int err, rc; static unsigned long once; - struct nd_region_namespaces *num_ns; + struct nd_region_data *ndrd; struct nd_region *nd_region = to_nd_region(dev); if (nd_region->num_lanes > num_online_cpus() @@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev) nd_region->num_lanes); } + rc = nd_region_activate(nd_region); + if (rc) + return rc; + rc = nd_blk_region_init(nd_region); if (rc) return rc; rc = nd_region_register_namespaces(nd_region, &err); - num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); - if (!num_ns) - return -ENOMEM; - if (rc < 0) return rc; - num_ns->active = rc; - num_ns->count = rc + err; - dev_set_drvdata(dev, num_ns); + ndrd = dev_get_drvdata(dev); + ndrd->ns_active = rc; + ndrd->ns_count = rc + err; if (rc && err && rc == err) return -ENODEV; @@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev); + device_for_each_child(dev, NULL, child_unregister); + /* flush attribute readers and disable */ nvdimm_bus_lock(dev); nd_region->ns_seed = NULL; @@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev) dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); - device_for_each_child(dev, NULL, child_unregister); return 0; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 40fcfea..e8d5ba7 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -14,13 +14,97 @@ #include <linux/highmem.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/hash.h> +#include <linux/pmem.h> #include <linux/sort.h> #include <linux/io.h> #include <linux/nd.h> #include "nd-core.h" #include "nd.h" +/* + * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is + * irrelevant. + */ +#include <linux/io-64-nonatomic-hi-lo.h> + static DEFINE_IDA(region_ida); +static DEFINE_PER_CPU(int, flush_idx); + +static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, + struct nd_region_data *ndrd) +{ + int i, j; + + dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), + nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); + for (i = 0; i < nvdimm->num_flush; i++) { + struct resource *res = &nvdimm->flush_wpq[i]; + unsigned long pfn = PHYS_PFN(res->start); + void __iomem *flush_page; + + /* check if flush hints share a page */ + for (j = 0; j < i; j++) { + struct resource *res_j = &nvdimm->flush_wpq[j]; + unsigned long pfn_j = PHYS_PFN(res_j->start); + + if (pfn == pfn_j) + break; + } + + if (j < i) + flush_page = (void __iomem *) ((unsigned long) + ndrd->flush_wpq[dimm][j] & PAGE_MASK); + else + flush_page = devm_nvdimm_ioremap(dev, + PHYS_PFN(pfn), PAGE_SIZE); + if (!flush_page) + return -ENXIO; + ndrd->flush_wpq[dimm][i] = flush_page + + (res->start & ~PAGE_MASK); + } + + return 0; +} + +int nd_region_activate(struct nd_region *nd_region) +{ + int i, num_flush = 0; + struct nd_region_data *ndrd; + struct device *dev = &nd_region->dev; + size_t flush_data_size = sizeof(void *); + + nvdimm_bus_lock(&nd_region->dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + /* at least one null hint slot per-dimm for the "no-hint" case */ + flush_data_size += sizeof(void *); + num_flush = min_not_zero(num_flush, nvdimm->num_flush); + if (!nvdimm->num_flush) + continue; + flush_data_size += nvdimm->num_flush * sizeof(void *); + } + nvdimm_bus_unlock(&nd_region->dev); + + ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL); + if (!ndrd) + return -ENOMEM; + dev_set_drvdata(dev, ndrd); + + ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd); + + if (rc) + return rc; + } + + return 0; +} static void nd_region_release(struct device *dev) { @@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size); static ssize_t init_namespaces_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); + struct nd_region_data *ndrd = dev_get_drvdata(dev); ssize_t rc; nvdimm_bus_lock(dev); - if (num_ns) - rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); + if (ndrd) + rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count); else rc = -ENXIO; nvdimm_bus_unlock(dev); @@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, if (is_nd_pmem(dev)) return; - - to_nd_blk_region(dev)->disable(nvdimm_bus, dev); } if (dev->parent && is_nd_blk(dev->parent) && probe) { nd_region = to_nd_region(dev->parent); @@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (ndbr) { nd_region = &ndbr->nd_region; ndbr->enable = ndbr_desc->enable; - ndbr->disable = ndbr_desc->disable; ndbr->do_io = ndbr_desc->do_io; } region_buf = ndbr; @@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, } EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); +/** + * nvdimm_flush - flush any posted write queues between the cpu and pmem media + * @nd_region: blk or interleaved pmem region + */ +void nvdimm_flush(struct nd_region *nd_region) +{ + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); + int i, idx; + + /* + * Try to encourage some diversity in flush hint addresses + * across cpus assuming a limited number of flush hints. + */ + idx = this_cpu_read(flush_idx); + idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8)); + + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + for (i = 0; i < nd_region->ndr_mappings; i++) + if (ndrd->flush_wpq[i][0]) + writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); + wmb(); +} +EXPORT_SYMBOL_GPL(nvdimm_flush); + +/** + * nvdimm_has_flush - determine write flushing requirements + * @nd_region: blk or interleaved pmem region + * + * Returns 1 if writes require flushing + * Returns 0 if writes do not require flushing + * Returns -ENXIO if flushing capability can not be determined + */ +int nvdimm_has_flush(struct nd_region *nd_region) +{ + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); + int i; + + /* no nvdimm == flushing capability unknown */ + if (nd_region->ndr_mappings == 0) + return -ENXIO; + + for (i = 0; i < nd_region->ndr_mappings; i++) + /* flush hints present, flushing required */ + if (ndrd->flush_wpq[i][0]) + return 1; + + /* + * The platform defines dimm devices without hints, assume + * platform persistence mechanism like ADR + */ + return 0; +} +EXPORT_SYMBOL_GPL(nvdimm_has_flush); + void __exit nd_region_devs_exit(void) { ida_destroy(®ion_ida); |