diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:35:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-08 14:35:59 -0700 |
commit | 12f03ee606914317e7e6a0815e53a48205c31dae (patch) | |
tree | f8579bf77d29b3921e1877e0ae12ec65b5ebc738 /drivers/nvdimm | |
parent | d9241b22b58e012f26dd2244508d9f4837402af0 (diff) | |
parent | 004f1afbe199e6ab20805b95aefd83ccd24bc5c7 (diff) | |
download | op-kernel-dev-12f03ee606914317e7e6a0815e53a48205c31dae.zip op-kernel-dev-12f03ee606914317e7e6a0815e53a48205c31dae.tar.gz |
Merge tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"This update has successfully completed a 0day-kbuild run and has
appeared in a linux-next release. The changes outside of the typical
drivers/nvdimm/ and drivers/acpi/nfit.[ch] paths are related to the
removal of IORESOURCE_CACHEABLE, the introduction of memremap(), and
the introduction of ZONE_DEVICE + devm_memremap_pages().
Summary:
- Introduce ZONE_DEVICE and devm_memremap_pages() as a generic
mechanism for adding device-driver-discovered memory regions to the
kernel's direct map.
This facility is used by the pmem driver to enable pfn_to_page()
operations on the page frames returned by DAX ('direct_access' in
'struct block_device_operations').
For now, the 'memmap' allocation for these "device" pages comes
from "System RAM". Support for allocating the memmap from device
memory will arrive in a later kernel.
- Introduce memremap() to replace usages of ioremap_cache() and
ioremap_wt(). memremap() drops the __iomem annotation for these
mappings to memory that do not have i/o side effects. The
replacement of ioremap_cache() with memremap() is limited to the
pmem driver to ease merging the api change in v4.3.
Completion of the conversion is targeted for v4.4.
- Similar to the usage of memcpy_to_pmem() + wmb_pmem() in the pmem
driver, update the VFS DAX implementation and PMEM api to provide
persistence guarantees for kernel operations on a DAX mapping.
- Convert the ACPI NFIT 'BLK' driver to map the block apertures as
cacheable to improve performance.
- Miscellaneous updates and fixes to libnvdimm including support for
issuing "address range scrub" commands, clarifying the optimal
'sector size' of pmem devices, a clarification of the usage of the
ACPI '_STA' (status) property for DIMM devices, and other minor
fixes"
* tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (34 commits)
libnvdimm, pmem: direct map legacy pmem by default
libnvdimm, pmem: 'struct page' for pmem
libnvdimm, pfn: 'struct page' provider infrastructure
x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB
add devm_memremap_pages
mm: ZONE_DEVICE for "device memory"
mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
dax: drop size parameter to ->direct_access()
nd_blk: change aperture mapping from WC to WB
nvdimm: change to use generic kvfree()
pmem, dax: have direct_access use __pmem annotation
dax: update I/O path to do proper PMEM flushing
pmem: add copy_from_iter_pmem() and clear_pmem()
pmem, x86: clean up conditional pmem includes
pmem: remove layer when calling arch_has_wmb_pmem()
pmem, x86: move x86 PMEM API to new pmem.h header
libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option
pmem: switch to devm_ allocations
devres: add devm_memremap
libnvdimm, btt: write and validate parent_uuid
...
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r-- | drivers/nvdimm/Kconfig | 23 | ||||
-rw-r--r-- | drivers/nvdimm/Makefile | 5 | ||||
-rw-r--r-- | drivers/nvdimm/btt.c | 50 | ||||
-rw-r--r-- | drivers/nvdimm/btt.h | 3 | ||||
-rw-r--r-- | drivers/nvdimm/btt_devs.c | 215 | ||||
-rw-r--r-- | drivers/nvdimm/claim.c | 201 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 5 | ||||
-rw-r--r-- | drivers/nvdimm/e820.c | 87 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 89 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 9 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 67 | ||||
-rw-r--r-- | drivers/nvdimm/pfn.h | 35 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 337 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 245 | ||||
-rw-r--r-- | drivers/nvdimm/region.c | 2 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 20 |
16 files changed, 1133 insertions, 260 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 72226ac..53c1162 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -21,6 +21,7 @@ config BLK_DEV_PMEM default LIBNVDIMM depends on HAS_IOMEM select ND_BTT if BTT + select ND_PFN if NVDIMM_PFN help Memory ranges for PMEM are described by either an NFIT (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a @@ -47,12 +48,16 @@ config ND_BLK (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode capabilities. +config ND_CLAIM + bool + config ND_BTT tristate config BTT bool "BTT: Block Translation Table (atomic sector updates)" default y if LIBNVDIMM + select ND_CLAIM help The Block Translation Table (BTT) provides atomic sector update semantics for persistent memory devices, so that @@ -65,4 +70,22 @@ config BTT Select Y if unsure +config ND_PFN + tristate + +config NVDIMM_PFN + bool "PFN: Map persistent (device) memory" + default LIBNVDIMM + depends on ZONE_DEVICE + select ND_CLAIM + help + Map persistent memory, i.e. advertise it to the memory + management sub-system. By default persistent memory does + not support direct I/O, RDMA, or any other usage that + requires a 'struct page' to mediate an I/O request. This + driver allocates and initializes the infrastructure needed + to support those use cases. + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 594bb97..ea84d3c 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o nd_pmem-y := pmem.o @@ -9,6 +10,8 @@ nd_btt-y := btt.o nd_blk-y := blk.o +nd_e820-y := e820.o + libnvdimm-y := core.o libnvdimm-y += bus.o libnvdimm-y += dimm_devs.o @@ -17,4 +20,6 @@ libnvdimm-y += region_devs.o libnvdimm-y += region.o libnvdimm-y += namespace_devs.o libnvdimm-y += label.o +libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 341202e..2542397 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -583,33 +583,6 @@ static void free_arenas(struct btt *btt) } /* - * This function checks if the metadata layout is valid and error free - */ -static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, - u8 *uuid, u32 lbasize) -{ - u64 checksum; - - if (memcmp(super->uuid, uuid, 16)) - return 0; - - checksum = le64_to_cpu(super->checksum); - super->checksum = 0; - if (checksum != nd_btt_sb_checksum(super)) - return 0; - super->checksum = cpu_to_le64(checksum); - - if (lbasize != le32_to_cpu(super->external_lbasize)) - return 0; - - /* TODO: figure out action for this */ - if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) - dev_info(to_dev(arena), "Found arena with an error flag\n"); - - return 1; -} - -/* * This function reads an existing valid btt superblock and * populates the corresponding arena_info struct */ @@ -632,8 +605,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, arena->logoff = arena_off + le64_to_cpu(super->logoff); arena->info2off = arena_off + le64_to_cpu(super->info2off); - arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : - (arena->info2off - arena->infooff + BTT_PG_SIZE); + arena->size = (le64_to_cpu(super->nextoff) > 0) + ? (le64_to_cpu(super->nextoff)) + : (arena->info2off - arena->infooff + BTT_PG_SIZE); arena->flags = le32_to_cpu(super->flags); } @@ -665,8 +639,7 @@ static int discover_arenas(struct btt *btt) if (ret) goto out; - if (!arena_is_valid(arena, super, btt->nd_btt->uuid, - btt->lbasize)) { + if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { if (remaining == btt->rawsize) { btt->init_state = INIT_NOTFOUND; dev_info(to_dev(arena), "No existing arenas\n"); @@ -755,10 +728,13 @@ static int create_arenas(struct btt *btt) * It is only called for an uninitialized arena when a write * to that arena occurs for the first time. */ -static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) +static int btt_arena_write_layout(struct arena_info *arena) { int ret; + u64 sum; struct btt_sb *super; + struct nd_btt *nd_btt = arena->nd_btt; + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); ret = btt_map_init(arena); if (ret) @@ -773,7 +749,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) return -ENOMEM; strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); - memcpy(super->uuid, uuid, 16); + memcpy(super->uuid, nd_btt->uuid, 16); + memcpy(super->parent_uuid, parent_uuid, 16); super->flags = cpu_to_le32(arena->flags); super->version_major = cpu_to_le16(arena->version_major); super->version_minor = cpu_to_le16(arena->version_minor); @@ -794,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) super->info2off = cpu_to_le64(arena->info2off - arena->infooff); super->flags = 0; - super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); + sum = nd_sb_checksum((struct nd_gen_sb *) super); + super->checksum = cpu_to_le64(sum); ret = btt_info_write(arena, super); @@ -813,7 +791,7 @@ static int btt_meta_init(struct btt *btt) mutex_lock(&btt->init_lock); list_for_each_entry(arena, &btt->arena_list, list) { - ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); + ret = btt_arena_write_layout(arena); if (ret) goto unlock; @@ -1447,8 +1425,6 @@ static int __init nd_btt_init(void) { int rc; - BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); - btt_major = register_blkdev(0, "btt"); if (btt_major < 0) return btt_major; diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 75b0d80..b2f8651 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -182,4 +182,7 @@ struct btt { int init_state; int num_arenas; }; + +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); + #endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 6ac8c0f..59ad54a6 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -21,63 +21,13 @@ #include "btt.h" #include "nd.h" -static void __nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || ndns->claim != &nd_btt->dev, - "%s: invalid claim\n", __func__); - ndns->claim = NULL; - nd_btt->ndns = NULL; - put_device(&ndns->dev); -} - -static void nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - if (!ndns) - return; - get_device(&ndns->dev); - device_lock(&ndns->dev); - __nd_btt_detach_ndns(nd_btt); - device_unlock(&ndns->dev); - put_device(&ndns->dev); -} - -static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - if (ndns->claim) - return false; - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || nd_btt->ndns, - "%s: invalid claim\n", __func__); - ndns->claim = &nd_btt->dev; - nd_btt->ndns = ndns; - get_device(&ndns->dev); - return true; -} - -static bool nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - bool claimed; - - device_lock(&ndns->dev); - claimed = __nd_btt_attach_ndns(nd_btt, ndns); - device_unlock(&ndns->dev); - return claimed; -} - static void nd_btt_release(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_btt *nd_btt = to_nd_btt(dev); dev_dbg(dev, "%s\n", __func__); - nd_btt_detach_ndns(nd_btt); + nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns); ida_simple_remove(&nd_region->btt_ida, nd_btt->id); kfree(nd_btt->uuid); kfree(nd_btt); @@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev, return rc; } -static int namespace_match(struct device *dev, void *data) -{ - char *name = data; - - return strcmp(name, dev_name(dev)) == 0; -} - -static bool is_nd_btt_idle(struct device *dev) -{ - struct nd_region *nd_region = to_nd_region(dev->parent); - struct nd_btt *nd_btt = to_nd_btt(dev); - - if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver) - return false; - return true; -} - -static ssize_t __namespace_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct nd_btt *nd_btt = to_nd_btt(dev); - struct nd_namespace_common *ndns; - struct device *found; - char *name; - - if (dev->driver) { - dev_dbg(dev, "%s: -EBUSY\n", __func__); - return -EBUSY; - } - - name = kstrndup(buf, len, GFP_KERNEL); - if (!name) - return -ENOMEM; - strim(name); - - if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) - /* pass */; - else { - len = -EINVAL; - goto out; - } - - ndns = nd_btt->ndns; - if (strcmp(name, "") == 0) { - /* detach the namespace and destroy / reset the btt device */ - nd_btt_detach_ndns(nd_btt); - if (is_nd_btt_idle(dev)) - nd_device_unregister(dev, ND_ASYNC); - else { - nd_btt->lbasize = 0; - kfree(nd_btt->uuid); - nd_btt->uuid = NULL; - } - goto out; - } else if (ndns) { - dev_dbg(dev, "namespace already set to: %s\n", - dev_name(&ndns->dev)); - len = -EBUSY; - goto out; - } - - found = device_find_child(dev->parent, name, namespace_match); - if (!found) { - dev_dbg(dev, "'%s' not found under %s\n", name, - dev_name(dev->parent)); - len = -ENODEV; - goto out; - } - - ndns = to_ndns(found); - if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { - dev_dbg(dev, "%s too small to host btt\n", name); - len = -ENXIO; - goto out_attach; - } - - WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev)); - if (!nd_btt_attach_ndns(nd_btt, ndns)) { - dev_dbg(dev, "%s already claimed\n", - dev_name(&ndns->dev)); - len = -EBUSY; - } - - out_attach: - put_device(&ndns->dev); /* from device_find_child */ - out: - kfree(name); - return len; -} - static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; nvdimm_bus_lock(dev); device_lock(dev); - rc = __namespace_store(dev, attr, buf, len); + rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); device_unlock(dev); @@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->type = &nd_btt_device_type; dev->groups = nd_btt_attribute_groups; device_initialize(&nd_btt->dev); - if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) { + if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", __func__, dev_name(ndns->claim)); put_device(dev); @@ -342,30 +203,54 @@ struct device *nd_btt_create(struct nd_region *nd_region) return dev; } -/* - * nd_btt_sb_checksum: compute checksum for btt info block +static bool uuid_is_null(u8 *uuid) +{ + static const u8 null_uuid[16]; + + return (memcmp(uuid, null_uuid, 16) == 0); +} + +/** + * nd_btt_arena_is_valid - check if the metadata layout is valid + * @nd_btt: device with BTT geometry and backing device info + * @super: pointer to the arena's info block being tested + * + * Check consistency of the btt info block with itself by validating + * the checksum, and with the parent namespace by verifying the + * parent_uuid contained in the info block with the one supplied in. * - * Returns a fletcher64 checksum of everything in the given info block - * except the last field (since that's where the checksum lives). + * Returns: + * false for an invalid info block, true for a valid one */ -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb) +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) { - u64 sum; - __le64 sum_save; - - sum_save = btt_sb->checksum; - btt_sb->checksum = 0; - sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1); - btt_sb->checksum = sum_save; - return sum; + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); + u64 checksum; + + if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0) + return false; + + if (!uuid_is_null(super->parent_uuid)) + if (memcmp(super->parent_uuid, parent_uuid, 16) != 0) + return false; + + checksum = le64_to_cpu(super->checksum); + super->checksum = 0; + if (checksum != nd_sb_checksum((struct nd_gen_sb *) super)) + return false; + super->checksum = cpu_to_le64(checksum); + + /* TODO: figure out action for this */ + if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) + dev_info(&nd_btt->dev, "Found arena with an error flag\n"); + + return true; } -EXPORT_SYMBOL(nd_btt_sb_checksum); +EXPORT_SYMBOL(nd_btt_arena_is_valid); static int __nd_btt_probe(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, struct btt_sb *btt_sb) { - u64 checksum; - if (!btt_sb || !ndns || !nd_btt) return -ENODEV; @@ -375,14 +260,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (nvdimm_namespace_capacity(ndns) < SZ_16M) return -ENXIO; - if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0) - return -ENODEV; - - checksum = le64_to_cpu(btt_sb->checksum); - btt_sb->checksum = 0; - if (checksum != nd_btt_sb_checksum(btt_sb)) + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) return -ENODEV; - btt_sb->checksum = cpu_to_le64(checksum); nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); @@ -416,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__, rc == 0 ? dev_name(dev) : "<none>"); if (rc < 0) { - __nd_btt_detach_ndns(to_nd_btt(dev)); + struct nd_btt *nd_btt = to_nd_btt(dev); + + __nd_detach_ndns(dev, &nd_btt->ndns); put_device(dev); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c new file mode 100644 index 0000000..e8f03b0 --- /dev/null +++ b/drivers/nvdimm/claim.c @@ -0,0 +1,201 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/device.h> +#include <linux/sizes.h> +#include "nd-core.h" +#include "pfn.h" +#include "btt.h" +#include "nd.h" + +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex) + || ndns->claim != dev, + "%s: invalid claim\n", __func__); + ndns->claim = NULL; + *_ndns = NULL; + put_device(&ndns->dev); +} + +void nd_detach_ndns(struct device *dev, + struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + if (!ndns) + return; + get_device(&ndns->dev); + device_lock(&ndns->dev); + __nd_detach_ndns(dev, _ndns); + device_unlock(&ndns->dev); + put_device(&ndns->dev); +} + +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + if (attach->claim) + return false; + dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex) + || *_ndns, + "%s: invalid claim\n", __func__); + attach->claim = dev; + *_ndns = attach; + get_device(&attach->dev); + return true; +} + +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + bool claimed; + + device_lock(&attach->dev); + claimed = __nd_attach_ndns(dev, attach, _ndns); + device_unlock(&attach->dev); + return claimed; +} + +static int namespace_match(struct device *dev, void *data) +{ + char *name = data; + + return strcmp(name, dev_name(dev)) == 0; +} + +static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct device *seed = NULL; + + if (is_nd_btt(dev)) + seed = nd_region->btt_seed; + else if (is_nd_pfn(dev)) + seed = nd_region->pfn_seed; + + if (seed == dev || ndns || dev->driver) + return false; + return true; +} + +static void nd_detach_and_reset(struct device *dev, + struct nd_namespace_common **_ndns) +{ + /* detach the namespace and destroy / reset the device */ + nd_detach_ndns(dev, _ndns); + if (is_idle(dev, *_ndns)) { + nd_device_unregister(dev, ND_ASYNC); + } else if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + + nd_btt->lbasize = 0; + kfree(nd_btt->uuid); + nd_btt->uuid = NULL; + } else if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + kfree(nd_pfn->uuid); + nd_pfn->uuid = NULL; + nd_pfn->mode = PFN_MODE_NONE; + } +} + +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len) +{ + struct nd_namespace_common *ndns; + struct device *found; + char *name; + + if (dev->driver) { + dev_dbg(dev, "%s: -EBUSY\n", __func__); + return -EBUSY; + } + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + strim(name); + + if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) + /* pass */; + else { + len = -EINVAL; + goto out; + } + + ndns = *_ndns; + if (strcmp(name, "") == 0) { + nd_detach_and_reset(dev, _ndns); + goto out; + } else if (ndns) { + dev_dbg(dev, "namespace already set to: %s\n", + dev_name(&ndns->dev)); + len = -EBUSY; + goto out; + } + + found = device_find_child(dev->parent, name, namespace_match); + if (!found) { + dev_dbg(dev, "'%s' not found under %s\n", name, + dev_name(dev->parent)); + len = -ENODEV; + goto out; + } + + ndns = to_ndns(found); + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { + dev_dbg(dev, "%s too small to host\n", name); + len = -ENXIO; + goto out_attach; + } + + WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); + if (!nd_attach_ndns(dev, ndns, _ndns)) { + dev_dbg(dev, "%s already claimed\n", + dev_name(&ndns->dev)); + len = -EBUSY; + } + + out_attach: + put_device(&ndns->dev); /* from device_find_child */ + out: + kfree(name); + return len; +} + +/* + * nd_sb_checksum: compute checksum for a generic info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) +{ + u64 sum; + __le64 sum_save; + + BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K); + + sum_save = nd_gen_sb->checksum; + nd_gen_sb->checksum = 0; + sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1); + nd_gen_sb->checksum = sum_save; + return sum; +} +EXPORT_SYMBOL(nd_sb_checksum); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c05eb80..651b8d1 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref) nvdimm_free_dpa(ndd, res); nvdimm_bus_unlock(dev); - if (ndd->data && is_vmalloc_addr(ndd->data)) - vfree(ndd->data); - else - kfree(ndd->data); + kvfree(ndd->data); kfree(ndd); put_device(dev); } diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c new file mode 100644 index 0000000..8282db2 --- /dev/null +++ b/drivers/nvdimm/e820.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2015, Christoph Hellwig. + * Copyright (c) 2015, Intel Corporation. + */ +#include <linux/platform_device.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> + +static const struct attribute_group *e820_pmem_attribute_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +static const struct attribute_group *e820_pmem_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static int e820_pmem_remove(struct platform_device *pdev) +{ + struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(nvdimm_bus); + return 0; +} + +static int e820_pmem_probe(struct platform_device *pdev) +{ + static struct nvdimm_bus_descriptor nd_desc; + struct device *dev = &pdev->dev; + struct nvdimm_bus *nvdimm_bus; + struct resource *p; + + nd_desc.attr_groups = e820_pmem_attribute_groups; + nd_desc.provider_name = "e820"; + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); + if (!nvdimm_bus) + goto err; + platform_set_drvdata(pdev, nvdimm_bus); + + for (p = iomem_resource.child; p ; p = p->sibling) { + struct nd_region_desc ndr_desc; + + if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0) + continue; + + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.res = p; + ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + ndr_desc.numa_node = NUMA_NO_NODE; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) + goto err; + } + + return 0; + + err: + nvdimm_bus_unregister(nvdimm_bus); + dev_err(dev, "failed to register legacy persistent memory ranges\n"); + return -ENXIO; +} + +static struct platform_driver e820_pmem_driver = { + .probe = e820_pmem_probe, + .remove = e820_pmem_remove, + .driver = { + .name = "e820_pmem", + }, +}; + +static __init int e820_pmem_init(void) +{ + return platform_driver_register(&e820_pmem_driver); +} + +static __exit void e820_pmem_exit(void) +{ + platform_driver_unregister(&e820_pmem_driver); +} + +MODULE_ALIAS("platform:e820_pmem*"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +module_init(e820_pmem_init); +module_exit(e820_pmem_exit); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index fef0dd8..0955b2c 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/slab.h> +#include <linux/pmem.h> #include <linux/nd.h> #include "nd-core.h" #include "nd.h" @@ -76,22 +77,54 @@ static bool is_namespace_io(struct device *dev) return dev ? dev->type == &namespace_io_device_type : false; } +bool pmem_should_map_pages(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) + return false; + + if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags)) + return false; + + if (is_nd_pfn(dev) || is_nd_btt(dev)) + return false; + +#ifdef ARCH_MEMREMAP_PMEM + return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; +#else + return false; +#endif +} +EXPORT_SYMBOL(pmem_should_map_pages); + const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name) { struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - const char *suffix = ""; + const char *suffix = NULL; - if (ndns->claim && is_nd_btt(ndns->claim)) - suffix = "s"; + if (ndns->claim) { + if (is_nd_btt(ndns->claim)) + suffix = "s"; + else if (is_nd_pfn(ndns->claim)) + suffix = "m"; + else + dev_WARN_ONCE(&ndns->dev, 1, + "unknown claim type by %s\n", + dev_name(ndns->claim)); + } - if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) - sprintf(name, "pmem%d%s", nd_region->id, suffix); - else if (is_namespace_blk(&ndns->dev)) { + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) { + if (!suffix && pmem_should_map_pages(&ndns->dev)) + suffix = "m"; + sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : ""); + } else if (is_namespace_blk(&ndns->dev)) { struct nd_namespace_blk *nsblk; nsblk = to_nd_namespace_blk(&ndns->dev); - sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, + suffix ? suffix : ""); } else { return NULL; } @@ -100,6 +133,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, } EXPORT_SYMBOL(nvdimm_namespace_disk_name); +const u8 *nd_dev_to_uuid(struct device *dev) +{ + static const u8 null_uuid[16]; + + if (!dev) + return null_uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + return nsblk->uuid; + } else + return null_uuid; +} +EXPORT_SYMBOL(nd_dev_to_uuid); + static ssize_t nstype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1235,12 +1288,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = { struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; + struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; struct nd_namespace_common *ndns; resource_size_t size; - if (nd_btt) { - ndns = nd_btt->ndns; - if (!ndns) + if (nd_btt || nd_pfn) { + struct device *host = NULL; + + if (nd_btt) { + host = &nd_btt->dev; + ndns = nd_btt->ndns; + } else if (nd_pfn) { + host = &nd_pfn->dev; + ndns = nd_pfn->ndns; + } + + if (!ndns || !host) return ERR_PTR(-ENODEV); /* @@ -1251,12 +1314,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", - dev_name(&nd_btt->dev)); + dev_name(host)); return ERR_PTR(-EBUSY); } - if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev, + if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host, "host (%s) vs claim (%s) mismatch\n", - dev_name(&nd_btt->dev), + dev_name(host), dev_name(ndns->claim))) return ERR_PTR(-ENXIO); } else { diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index e1970c7..159aed5 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region, int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); void get_ndd(struct nvdimm_drvdata *ndd); resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index c41f53e74..417e521 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -29,6 +29,13 @@ enum { ND_MAX_LANES = 256, SECTOR_SHIFT = 9, INT_LBASIZE_ALIGNMENT = 64, +#if IS_ENABLED(CONFIG_NVDIMM_PFN) + ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE, + ND_PFN_MASK = ND_PFN_ALIGN - 1, +#else + ND_PFN_ALIGN = 0, + ND_PFN_MASK = 0, +#endif }; struct nvdimm_drvdata { @@ -92,8 +99,11 @@ struct nd_region { struct device dev; struct ida ns_ida; struct ida btt_ida; + struct ida pfn_ida; + unsigned long flags; struct device *ns_seed; struct device *btt_seed; + struct device *pfn_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -133,6 +143,22 @@ struct nd_btt { int id; }; +enum nd_pfn_mode { + PFN_MODE_NONE, + PFN_MODE_RAM, + PFN_MODE_PMEM, +}; + +struct nd_pfn { + int id; + u8 *uuid; + struct device dev; + unsigned long npfns; + enum nd_pfn_mode mode; + struct nd_pfn_sb *pfn_sb; + struct nd_namespace_common *ndns; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -159,14 +185,19 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); struct nd_btt *to_nd_btt(struct device *dev); -struct btt_sb; -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb); + +struct nd_gen_sb { + char reserved[SZ_4K - 8]; + __le64 checksum; +}; + +u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else -static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) { return -ENODEV; } @@ -180,8 +211,36 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) { return NULL; } +#endif +struct nd_pfn *to_nd_pfn(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_PFN) +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); +bool is_nd_pfn(struct device *dev); +struct device *nd_pfn_create(struct nd_region *nd_region); +int nd_pfn_validate(struct nd_pfn *nd_pfn); +#else +static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + return -ENODEV; +} + +static inline bool is_nd_pfn(struct device *dev) +{ + return false; +} + +static inline struct device *nd_pfn_create(struct nd_region *nd_region) +{ + return NULL; +} + +static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + return -ENODEV; +} #endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); @@ -217,4 +276,6 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) } void nd_iostat_end(struct bio *bio, unsigned long start); resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); +const u8 *nd_dev_to_uuid(struct device *dev); +bool pmem_should_map_pages(struct device *dev); #endif /* __ND_H__ */ diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h new file mode 100644 index 0000000..cc24375 --- /dev/null +++ b/drivers/nvdimm/pfn.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __NVDIMM_PFN_H +#define __NVDIMM_PFN_H + +#include <linux/types.h> + +#define PFN_SIG_LEN 16 +#define PFN_SIG "NVDIMM_PFN_INFO\0" + +struct nd_pfn_sb { + u8 signature[PFN_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le64 dataoff; + __le64 npfns; + __le32 mode; + u8 padding[4012]; + __le64 checksum; +}; +#endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c new file mode 100644 index 0000000..3fd7d0d --- /dev/null +++ b/drivers/nvdimm/pfn_devs.c @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "pfn.h" +#include "nd.h" + +static void nd_pfn_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_pfn); +} + +static struct device_type nd_pfn_device_type = { + .name = "nd_pfn", + .release = nd_pfn_release, +}; + +bool is_nd_pfn(struct device *dev) +{ + return dev ? dev->type == &nd_pfn_device_type : false; +} +EXPORT_SYMBOL(is_nd_pfn); + +struct nd_pfn *to_nd_pfn(struct device *dev) +{ + struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); + + WARN_ON(!is_nd_pfn(dev)); + return nd_pfn; +} +EXPORT_SYMBOL(to_nd_pfn); + +static ssize_t mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + switch (nd_pfn->mode) { + case PFN_MODE_RAM: + return sprintf(buf, "ram\n"); + case PFN_MODE_PMEM: + return sprintf(buf, "pmem\n"); + default: + return sprintf(buf, "none\n"); + } +} + +static ssize_t mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc = 0; + + device_lock(dev); + nvdimm_bus_lock(dev); + if (dev->driver) + rc = -EBUSY; + else { + size_t n = len - 1; + + if (strncmp(buf, "pmem\n", n) == 0 + || strncmp(buf, "pmem", n) == 0) { + /* TODO: allocate from PMEM support */ + rc = -ENOTTY; + } else if (strncmp(buf, "ram\n", n) == 0 + || strncmp(buf, "ram", n) == 0) + nd_pfn->mode = PFN_MODE_RAM; + else if (strncmp(buf, "none\n", n) == 0 + || strncmp(buf, "none", n) == 0) + nd_pfn->mode = PFN_MODE_NONE; + else + rc = -EINVAL; + } + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(mode); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + if (nd_pfn->uuid) + return sprintf(buf, "%pUb\n", nd_pfn->uuid); + return sprintf(buf, "\n"); +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t namespace_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + rc = sprintf(buf, "%s\n", nd_pfn->ndns + ? dev_name(&nd_pfn->ndns->dev) : ""); + nvdimm_bus_unlock(dev); + return rc; +} + +static ssize_t namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + device_lock(dev); + rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(namespace); + +static struct attribute *nd_pfn_attributes[] = { + &dev_attr_mode.attr, + &dev_attr_namespace.attr, + &dev_attr_uuid.attr, + NULL, +}; + +static struct attribute_group nd_pfn_attribute_group = { + .attrs = nd_pfn_attributes, +}; + +static const struct attribute_group *nd_pfn_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct device *__nd_pfn_create(struct nd_region *nd_region, + u8 *uuid, enum nd_pfn_mode mode, + struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn; + struct device *dev; + + /* we can only create pages for contiguous ranged of pmem */ + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); + if (!nd_pfn) + return NULL; + + nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_pfn); + return NULL; + } + + nd_pfn->mode = mode; + if (uuid) + uuid = kmemdup(uuid, 16, GFP_KERNEL); + nd_pfn->uuid = uuid; + dev = &nd_pfn->dev; + dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); + dev->parent = &nd_region->dev; + dev->type = &nd_pfn_device_type; + dev->groups = nd_pfn_attribute_groups; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); + return NULL; + } + return dev; +} + +struct device *nd_pfn_create(struct nd_region *nd_region) +{ + struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, + NULL); + + if (dev) + __nd_device_register(dev); + return dev; +} + +int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_io *nsio; + u64 checksum, offset; + + if (!pfn_sb || !ndns) + return -ENODEV; + + if (!is_nd_pmem(nd_pfn->dev.parent)) + return -ENODEV; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN) + return -ENODEV; + + if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) + return -ENXIO; + + if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) + return -ENODEV; + + checksum = le64_to_cpu(pfn_sb->checksum); + pfn_sb->checksum = 0; + if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) + return -ENODEV; + pfn_sb->checksum = cpu_to_le64(checksum); + + switch (le32_to_cpu(pfn_sb->mode)) { + case PFN_MODE_RAM: + break; + case PFN_MODE_PMEM: + /* TODO: allocate from PMEM support */ + return -ENOTTY; + default: + return -ENXIO; + } + + if (!nd_pfn->uuid) { + /* from probe we allocate */ + nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); + if (!nd_pfn->uuid) + return -ENOMEM; + } else { + /* from init we validate */ + if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) + return -EINVAL; + } + + /* + * These warnings are verbose because they can only trigger in + * the case where the physical address alignment of the + * namespace has changed since the pfn superblock was + * established. + */ + offset = le64_to_cpu(pfn_sb->dataoff); + nsio = to_nd_namespace_io(&ndns->dev); + if (nsio->res.start & ND_PFN_MASK) { + dev_err(&nd_pfn->dev, + "init failed: %s not section aligned\n", + dev_name(&ndns->dev)); + return -EBUSY; + } else if (offset >= resource_size(&nsio->res)) { + dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", + dev_name(&ndns->dev)); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL(nd_pfn_validate); + +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + int rc; + struct device *dev; + struct nd_pfn *nd_pfn; + struct nd_pfn_sb *pfn_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dev) + return -ENOMEM; + dev_set_drvdata(dev, drvdata); + pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + nd_pfn = to_nd_pfn(dev); + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__, + rc == 0 ? dev_name(dev) : "<none>"); + if (rc < 0) { + __nd_detach_ndns(dev, &nd_pfn->ndns); + put_device(dev); + } else + __nd_device_register(&nd_pfn->dev); + + return rc; +} +EXPORT_SYMBOL(nd_pfn_probe); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4c079d5..b952538 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,18 +21,24 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/module.h> +#include <linux/memory_hotplug.h> #include <linux/moduleparam.h> +#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/pmem.h> #include <linux/nd.h> +#include "pfn.h" #include "nd.h" struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; + struct nd_namespace_common *ndns; /* One contiguous memory region per device */ phys_addr_t phys_addr; + /* when non-zero this device is hosting a 'pfn' instance */ + phys_addr_t data_offset; void __pmem *virt_addr; size_t size; }; @@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, sector_t sector) { void *mem = kmap_atomic(page); - size_t pmem_off = sector << 9; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { @@ -92,19 +98,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; - size_t offset = sector << 9; - - if (!pmem) - return -ENODEV; + resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t size; + + if (pmem->data_offset) { + /* + * Limit the direct_access() size to what is covered by + * the memmap + */ + size = (pmem->size - offset) & ~ND_PFN_MASK; + } else + size = pmem->size - offset; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ - *kaddr = (void __force *) pmem->virt_addr + offset; + *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; - return pmem->size - offset; + return size; } static const struct block_device_operations pmem_fops = { @@ -119,27 +132,33 @@ static struct pmem_device *pmem_alloc(struct device *dev, { struct pmem_device *pmem; - pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return ERR_PTR(-ENOMEM); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_pmem_api()) + if (!arch_has_wmb_pmem()) dev_warn(dev, "unable to guarantee persistence of writes\n"); - if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, + dev_name(dev))) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - kfree(pmem); return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) { - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); - return ERR_PTR(-ENXIO); + if (pmem_should_map_pages(dev)) { + void *addr = devm_memremap_pages(dev, res); + + if (IS_ERR(addr)) + return addr; + pmem->virt_addr = (void __pmem *) addr; + } else { + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, + pmem->size); + if (!pmem->virt_addr) + return ERR_PTR(-ENXIO); } return pmem; @@ -147,13 +166,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, static void pmem_detach_disk(struct pmem_device *pmem) { + if (!pmem->pmem_disk) + return; + del_gendisk(pmem->pmem_disk); put_disk(pmem->pmem_disk); blk_cleanup_queue(pmem->pmem_queue); } -static int pmem_attach_disk(struct nd_namespace_common *ndns, - struct pmem_device *pmem) +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns, struct pmem_device *pmem) { struct gendisk *disk; @@ -162,6 +184,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); + blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); @@ -179,8 +202,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); - disk->driverfs_dev = &ndns->dev; - set_capacity(disk, pmem->size >> 9); + disk->driverfs_dev = dev; + set_capacity(disk, (pmem->size - pmem->data_offset) / 512); pmem->pmem_disk = disk; add_disk(disk); @@ -209,11 +232,152 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } -static void pmem_free(struct pmem_device *pmem) +static int nd_pfn_init(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_region *nd_region; + unsigned long npfns; + phys_addr_t offset; + u64 checksum; + int rc; + + if (!pfn_sb) + return -ENOMEM; + + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + if (rc == 0 || rc == -EBUSY) + return rc; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN + || pmem->phys_addr & ND_PFN_MASK) + return -ENODEV; + + nd_region = to_nd_region(nd_pfn->dev.parent); + if (nd_region->ro) { + dev_info(&nd_pfn->dev, + "%s is read-only, unable to init metadata\n", + dev_name(&nd_region->dev)); + goto err; + } + + memset(pfn_sb, 0, sizeof(*pfn_sb)); + npfns = (pmem->size - SZ_8K) / SZ_4K; + /* + * Note, we use 64 here for the standard size of struct page, + * debugging options may cause it to be larger in which case the + * implementation will limit the pfns advertised through + * ->direct_access() to those that are included in the memmap. + */ + if (nd_pfn->mode == PFN_MODE_PMEM) + offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE); + else if (nd_pfn->mode == PFN_MODE_RAM) + offset = SZ_8K; + else + goto err; + + npfns = (pmem->size - offset) / SZ_4K; + pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + pfn_sb->dataoff = cpu_to_le64(offset); + pfn_sb->npfns = cpu_to_le64(npfns); + memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); + memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + pfn_sb->version_major = cpu_to_le16(1); + checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); + pfn_sb->checksum = cpu_to_le64(checksum); + + rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)); + if (rc) + goto err; + + return 0; + err: + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + return -ENXIO; +} + +static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct pmem_device *pmem; + + /* free pmem disk */ + pmem = dev_get_drvdata(&nd_pfn->dev); + pmem_detach_disk(pmem); + + /* release nd_pfn resources */ + kfree(nd_pfn->pfn_sb); + nd_pfn->pfn_sb = NULL; + + return 0; +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) { - memunmap_pmem(pmem->virt_addr); - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct device *dev = &nd_pfn->dev; + struct vmem_altmap *altmap; + struct nd_region *nd_region; + struct nd_pfn_sb *pfn_sb; + struct pmem_device *pmem; + phys_addr_t offset; + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + nd_region = to_nd_region(dev->parent); + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + + if (PAGE_SIZE != SZ_4K) { + dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n"); + return -ENXIO; + } + if (nsio->res.start & ND_PFN_MASK) { + dev_err(dev, "%s not memory hotplug section aligned\n", + dev_name(&ndns->dev)); + return -ENXIO; + } + + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); + if (nd_pfn->mode == PFN_MODE_RAM) { + if (offset != SZ_8K) + return -EINVAL; + nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); + altmap = NULL; + } else { + rc = -ENXIO; + goto err; + } + + /* establish pfn range for lookup, and switch to direct map */ + pmem = dev_get_drvdata(dev); + memunmap_pmem(dev, pmem->virt_addr); + pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res); + if (IS_ERR(pmem->virt_addr)) { + rc = PTR_ERR(pmem->virt_addr); + goto err; + } + + /* attach pmem disk in "pfn-mode" */ + pmem->data_offset = offset; + rc = pmem_attach_disk(dev, ndns, pmem); + if (rc) + goto err; + + return rc; + err: + nvdimm_namespace_detach_pfn(ndns); + return rc; } static int nd_pmem_probe(struct device *dev) @@ -222,7 +386,6 @@ static int nd_pmem_probe(struct device *dev) struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct pmem_device *pmem; - int rc; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -233,18 +396,27 @@ static int nd_pmem_probe(struct device *dev) if (IS_ERR(pmem)) return PTR_ERR(pmem); + pmem->ndns = ndns; dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) - rc = nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(ndns, pmem) == 0) { + return nvdimm_namespace_attach_btt(ndns); + + if (is_nd_pfn(dev)) + return nvdimm_namespace_attach_pfn(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) { /* we'll come back as btt-pmem */ - rc = -ENXIO; - } else - rc = pmem_attach_disk(ndns, pmem); - if (rc) - pmem_free(pmem); - return rc; + return -ENXIO; + } + + if (nd_pfn_probe(ndns, pmem) == 0) { + /* we'll come back as pfn-pmem */ + return -ENXIO; + } + + return pmem_attach_disk(dev, ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -252,10 +424,11 @@ static int nd_pmem_remove(struct device *dev) struct pmem_device *pmem = dev_get_drvdata(dev); if (is_nd_btt(dev)) - nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + nvdimm_namespace_detach_btt(pmem->ndns); + else if (is_nd_pfn(dev)) + nvdimm_namespace_detach_pfn(pmem->ndns); else pmem_detach_disk(pmem); - pmem_free(pmem); return 0; } diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index f28f78c..7da63ea 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev) return -ENODEV; nd_region->btt_seed = nd_btt_create(nd_region); + nd_region->pfn_seed = nd_pfn_create(nd_region); if (err == 0) return 0; @@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev) nvdimm_bus_lock(dev); nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; + nd_region->pfn_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 7384455..529f3f0 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev, } static DEVICE_ATTR_RO(btt_seed); +static ssize_t pfn_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->pfn_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(pfn_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_nstype.attr, &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, + &dev_attr_pfn_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -740,10 +758,12 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->provider_data = ndr_desc->provider_data; nd_region->nd_set = ndr_desc->nd_set; nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); + ida_init(&nd_region->pfn_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; |