summaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 13:10:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 13:10:57 -0700
commit89fd915c402113528750353ad6de9ea68a787e5c (patch)
tree03ba8e8e6400e43ef518393259941eae39ee32d8 /drivers/nvdimm
parent66c9457df3926efff65529dab1a8c742df756790 (diff)
parent04c3c982fcc0151ed3574d7ae4f1e62278054d72 (diff)
downloadop-kernel-dev-89fd915c402113528750353ad6de9ea68a787e5c.zip
op-kernel-dev-89fd915c402113528750353ad6de9ea68a787e5c.tar.gz
Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm from Dan Williams: "A rework of media error handling in the BTT driver and other updates. It has appeared in a few -next releases and collected some late- breaking build-error and warning fixups as a result. Summary: - Media error handling support in the Block Translation Table (BTT) driver is reworked to address sleeping-while-atomic locking and memory-allocation-context conflicts. - The dax_device lookup overhead for xfs and ext4 is moved out of the iomap hot-path to a mount-time lookup. - A new 'ecc_unit_size' sysfs attribute is added to advertise the read-modify-write boundary property of a persistent memory range. - Preparatory fix-ups for arm and powerpc pmem support are included along with other miscellaneous fixes" * tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (26 commits) libnvdimm, btt: fix format string warnings libnvdimm, btt: clean up warning and error messages ext4: fix null pointer dereference on sbi libnvdimm, nfit: move the check on nd_reserved2 to the endpoint dax: fix FS_DAX=n BLOCK=y compilation libnvdimm: fix integer overflow static analysis warning libnvdimm, nd_blk: remove mmio_flush_range() libnvdimm, btt: rework error clearing libnvdimm: fix potential deadlock while clearing errors libnvdimm, btt: cache sector_size in arena_info libnvdimm, btt: ensure that flags were also unchanged during a map_read libnvdimm, btt: refactor map entry operations with macros libnvdimm, btt: fix a missed NVDIMM_IO_ATOMIC case in the write path libnvdimm, nfit: export an 'ecc_unit_size' sysfs attribute ext4: perform dax_device lookup at mount ext2: perform dax_device lookup at mount xfs: perform dax_device lookup at mount dax: introduce a fs_dax_get_by_bdev() helper libnvdimm, btt: check memory allocation failure libnvdimm, label: fix index block size calculation ...
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/btt.c197
-rw-r--r--drivers/nvdimm/btt.h11
-rw-r--r--drivers/nvdimm/btt_devs.c4
-rw-r--r--drivers/nvdimm/bus.c27
-rw-r--r--drivers/nvdimm/claim.c9
-rw-r--r--drivers/nvdimm/core.c10
-rw-r--r--drivers/nvdimm/label.c30
-rw-r--r--drivers/nvdimm/namespace_devs.c6
-rw-r--r--drivers/nvdimm/nd.h16
-rw-r--r--drivers/nvdimm/pfn_devs.c53
-rw-r--r--drivers/nvdimm/pmem.h14
-rw-r--r--drivers/nvdimm/region_devs.c6
12 files changed, 259 insertions, 124 deletions
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 6049164..d5612bd 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -31,6 +31,16 @@ enum log_ent_request {
LOG_OLD_ENT
};
+static struct device *to_dev(struct arena_info *arena)
+{
+ return &arena->nd_btt->dev;
+}
+
+static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
+{
+ return offset + nd_btt->initial_offset;
+}
+
static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
void *buf, size_t n, unsigned long flags)
{
@@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
- offset += arena->nd_btt->initial_offset;
+ offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_read_bytes(ndns, offset, buf, n, flags);
}
@@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
- offset += arena->nd_btt->initial_offset;
+ offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_write_bytes(ndns, offset, buf, n, flags);
}
@@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
* We rely on that to make sure rw_bytes does error clearing
* correctly, so make sure that is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
- WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512),
+ "arena->infooff: %#llx is unaligned\n", arena->infooff);
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
+ "arena->info2off: %#llx is unaligned\n", arena->info2off);
ret = arena_write_bytes(arena, arena->info2off, super,
sizeof(struct btt_sb), 0);
@@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
{
- WARN_ON(!super);
return arena_read_bytes(arena, arena->infooff, super,
sizeof(struct btt_sb), 0);
}
@@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
{
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
- WARN_ON(lba >= arena->external_nlba);
+ if (unlikely(lba >= arena->external_nlba))
+ dev_err_ratelimited(to_dev(arena),
+ "%s: lba %#x out of range (max: %#x)\n",
+ __func__, lba, arena->external_nlba);
return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
}
@@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* This 'mapping' is supposed to be just the LBA mapping, without
* any flags set, so strip the flag bits.
*/
- mapping &= MAP_LBA_MASK;
+ mapping = ent_lba(mapping);
ze = (z_flag << 1) + e_flag;
switch (ze) {
@@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* construed as a valid 'normal' case, but we decide not to,
* to avoid confusion
*/
- WARN_ONCE(1, "Invalid use of Z and E flags\n");
+ dev_err_ratelimited(to_dev(arena),
+ "Invalid use of Z and E flags\n");
return -EIO;
}
@@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
u32 raw_mapping, postmap, ze, z_flag, e_flag;
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
- WARN_ON(lba >= arena->external_nlba);
+ if (unlikely(lba >= arena->external_nlba))
+ dev_err_ratelimited(to_dev(arena),
+ "%s: lba %#x out of range (max: %#x)\n",
+ __func__, lba, arena->external_nlba);
ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
if (ret)
@@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
raw_mapping = le32_to_cpu(in);
- z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
- e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
+ z_flag = ent_z_flag(raw_mapping);
+ e_flag = ent_e_flag(raw_mapping);
ze = (z_flag << 1) + e_flag;
- postmap = raw_mapping & MAP_LBA_MASK;
+ postmap = ent_lba(raw_mapping);
/* Reuse the {z,e}_flag variables for *trim and *error */
z_flag = 0;
@@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
static int btt_log_read_pair(struct arena_info *arena, u32 lane,
struct log_entry *ent)
{
- WARN_ON(!ent);
return arena_read_bytes(arena,
arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
2 * LOG_ENT_SIZE, 0);
@@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent)
return old;
}
-static struct device *to_dev(struct arena_info *arena)
-{
- return &arena->nd_btt->dev;
-}
-
/*
* This function copies the desired (old/new) log entry into ent if
* it is not NULL. It returns the sub-slot number (0 or 1)
@@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
if (++(arena->freelist[lane].seq) == 4)
arena->freelist[lane].seq = 1;
- arena->freelist[lane].block = le32_to_cpu(ent->old_map);
+ if (ent_e_flag(ent->old_map))
+ arena->freelist[lane].has_err = 1;
+ arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
return ret;
}
@@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512),
+ "arena->mapoff: %#llx is unaligned\n", arena->mapoff);
while (mapsize) {
size_t size = min(mapsize, chunk_size);
- WARN_ON_ONCE(size < 512);
+ dev_WARN_ONCE(to_dev(arena), size < 512,
+ "chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
size, 0);
if (ret)
@@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512),
+ "arena->logoff: %#llx is unaligned\n", arena->logoff);
while (logsize) {
size_t size = min(logsize, chunk_size);
- WARN_ON_ONCE(size < 512);
+ dev_WARN_ONCE(to_dev(arena), size < 512,
+ "chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
size, 0);
if (ret)
@@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena)
return ret;
}
+static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
+{
+ return arena->dataoff + ((u64)lba * arena->internal_lbasize);
+}
+
+static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
+{
+ int ret = 0;
+
+ if (arena->freelist[lane].has_err) {
+ void *zero_page = page_address(ZERO_PAGE(0));
+ u32 lba = arena->freelist[lane].block;
+ u64 nsoff = to_namespace_offset(arena, lba);
+ unsigned long len = arena->sector_size;
+
+ mutex_lock(&arena->err_lock);
+
+ while (len) {
+ unsigned long chunk = min(len, PAGE_SIZE);
+
+ ret = arena_write_bytes(arena, nsoff, zero_page,
+ chunk, 0);
+ if (ret)
+ break;
+ len -= chunk;
+ nsoff += chunk;
+ if (len == 0)
+ arena->freelist[lane].has_err = 0;
+ }
+ mutex_unlock(&arena->err_lock);
+ }
+ return ret;
+}
+
static int btt_freelist_init(struct arena_info *arena)
{
int old, new, ret;
@@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena)
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
arena->freelist[i].block = le32_to_cpu(log_new.old_map);
+ /*
+ * FIXME: if error clearing fails during init, we want to make
+ * the BTT read-only
+ */
+ if (ent_e_flag(log_new.old_map)) {
+ ret = arena_clear_freelist_error(arena, i);
+ if (ret)
+ dev_err_ratelimited(to_dev(arena),
+ "Unable to clear known errors\n");
+ }
+
/* This implies a newly created or untouched flog entry */
if (log_new.old_map == log_new.new_map)
continue;
@@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena)
if (ret)
return ret;
}
-
}
return 0;
@@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
if (!arena)
return NULL;
arena->nd_btt = btt->nd_btt;
+ arena->sector_size = btt->sector_size;
if (!size)
return arena;
@@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
goto out;
@@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap)
spin_unlock(&arena->map_locks[idx].lock);
}
-static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
-{
- return arena->dataoff + ((u64)lba * arena->internal_lbasize);
-}
-
static int btt_data_read(struct arena_info *arena, struct page *page,
unsigned int off, u32 lba, u32 len)
{
@@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
while (1) {
u32 new_map;
+ int new_t, new_e;
if (t_flag) {
zero_fill_data(page, off, cur_len);
@@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
barrier();
- ret = btt_map_read(arena, premap, &new_map, &t_flag,
- &e_flag, NVDIMM_IO_ATOMIC);
+ ret = btt_map_read(arena, premap, &new_map, &new_t,
+ &new_e, NVDIMM_IO_ATOMIC);
if (ret)
goto out_rtt;
- if (postmap == new_map)
+ if ((postmap == new_map) && (t_flag == new_t) &&
+ (e_flag == new_e))
break;
postmap = new_map;
+ t_flag = new_t;
+ e_flag = new_e;
}
ret = btt_data_read(arena, page, off, postmap, cur_len);
- if (ret)
+ if (ret) {
+ int rc;
+
+ /* Media error - set the e_flag */
+ rc = btt_map_write(arena, premap, postmap, 0, 1,
+ NVDIMM_IO_ATOMIC);
goto out_rtt;
+ }
if (bip) {
ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
@@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
return ret;
}
+/*
+ * Normally, arena_{read,write}_bytes will take care of the initial offset
+ * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
+ * we need the final, raw namespace offset here
+ */
+static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
+ u32 postmap)
+{
+ u64 nsoff = adjust_initial_offset(arena->nd_btt,
+ to_namespace_offset(arena, postmap));
+ sector_t phys_sector = nsoff >> 9;
+
+ return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
+}
+
static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
sector_t sector, struct page *page, unsigned int off,
unsigned int len)
@@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
while (len) {
u32 cur_len;
+ int e_flag;
+ retry:
lane = nd_region_acquire_lane(btt->nd_region);
ret = lba_to_arena(btt, sector, &premap, &arena);
@@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
goto out_lane;
}
+ if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
+ arena->freelist[lane].has_err = 1;
+
+ if (mutex_is_locked(&arena->err_lock)
+ || arena->freelist[lane].has_err) {
+ nd_region_release_lane(btt->nd_region, lane);
+
+ ret = arena_clear_freelist_error(arena, lane);
+ if (ret)
+ return ret;
+
+ /* OK to acquire a different lane/free block */
+ goto retry;
+ }
+
new_postmap = arena->freelist[lane].block;
/* Wait if the new block is being read from */
@@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
}
lock_map(arena, premap);
- ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+ ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
@@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
ret = -EIO;
goto out_map;
}
+ if (e_flag)
+ set_e_flag(old_postmap);
log.lba = cpu_to_le32(premap);
log.old_map = cpu_to_le32(old_postmap);
@@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
if (ret)
goto out_map;
- ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
+ ret = btt_map_write(arena, premap, new_postmap, 0, 0,
+ NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
unlock_map(arena, premap);
nd_region_release_lane(btt->nd_region, lane);
+ if (e_flag) {
+ ret = arena_clear_freelist_error(arena, lane);
+ if (ret)
+ return ret;
+ }
+
len -= cur_len;
off += cur_len;
sector += btt->sector_size >> SECTOR_SHIFT;
@@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
- BUG_ON(len > PAGE_SIZE);
- /* Make sure len is in multiples of sector size. */
- /* XXX is this right? */
- BUG_ON(len < btt->sector_size);
- BUG_ON(len % btt->sector_size);
+ if (len > PAGE_SIZE || len < btt->sector_size ||
+ len % btt->sector_size) {
+ dev_err_ratelimited(&btt->nd_btt->dev,
+ "unaligned bio segment (len: %d)\n", len);
+ bio->bi_status = BLK_STS_IOERR;
+ break;
+ }
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector);
@@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
{
int ret;
struct btt *btt;
+ struct nd_namespace_io *nsio;
struct device *dev = &nd_btt->dev;
btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
@@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
INIT_LIST_HEAD(&btt->arena_list);
mutex_init(&btt->init_lock);
btt->nd_region = nd_region;
+ nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
+ btt->phys_bb = &nsio->bb;
ret = discover_arenas(btt);
if (ret) {
@@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
}
btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
+ if (!btt_sb)
+ return -ENOMEM;
/*
* If this returns < 0, that is ok as it just means there wasn't
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 888e862..578c205 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -15,6 +15,7 @@
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
+#include <linux/badblocks.h>
#include <linux/types.h>
#define BTT_SIG_LEN 16
@@ -38,6 +39,11 @@
#define IB_FLAG_ERROR 0x00000001
#define IB_FLAG_ERROR_MASK 0x00000001
+#define ent_lba(ent) (ent & MAP_LBA_MASK)
+#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
+#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
+#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
+
enum btt_init_state {
INIT_UNCHECKED = 0,
INIT_NOTFOUND,
@@ -78,6 +84,7 @@ struct free_entry {
u32 block;
u8 sub;
u8 seq;
+ u8 has_err;
};
struct aligned_lock {
@@ -104,6 +111,7 @@ struct aligned_lock {
* handle incoming writes.
* @version_major: Metadata layout version major.
* @version_minor: Metadata layout version minor.
+ * @sector_size: The Linux sector size - 512 or 4096
* @nextoff: Offset in bytes to the start of the next arena.
* @infooff: Offset in bytes to the info block of this arena.
* @dataoff: Offset in bytes to the data area of this arena.
@@ -131,6 +139,7 @@ struct arena_info {
u32 nfree;
u16 version_major;
u16 version_minor;
+ u32 sector_size;
/* Byte offsets to the different on-media structures */
u64 nextoff;
u64 infooff;
@@ -147,6 +156,7 @@ struct arena_info {
struct dentry *debugfs_dir;
/* Arena flags */
u32 flags;
+ struct mutex err_lock;
};
/**
@@ -181,6 +191,7 @@ struct btt {
struct mutex init_lock;
int init_state;
int num_arenas;
+ struct badblocks *phys_bb;
};
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 3e359d2..d589252 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -61,7 +61,7 @@ static ssize_t sector_size_show(struct device *dev,
{
struct nd_btt *nd_btt = to_nd_btt(dev);
- return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+ return nd_size_select_show(nd_btt->lbasize, btt_lbasize_supported, buf);
}
static ssize_t sector_size_store(struct device *dev,
@@ -72,7 +72,7 @@ static ssize_t sector_size_store(struct device *dev,
device_lock(dev);
nvdimm_bus_lock(dev);
- rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+ rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 937fafa..baf2839 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,6 +11,7 @@
* General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/sched/mm.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/module.h>
@@ -234,6 +235,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
struct nd_cmd_clear_error clear_err;
struct nd_cmd_ars_cap ars_cap;
u32 clear_err_unit, mask;
+ unsigned int noio_flag;
int cmd_rc, rc;
if (!nvdimm_bus)
@@ -250,8 +252,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&ars_cap, 0, sizeof(ars_cap));
ars_cap.address = phys;
ars_cap.length = len;
+ noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
sizeof(ars_cap), &cmd_rc);
+ memalloc_noio_restore(noio_flag);
if (rc < 0)
return rc;
if (cmd_rc < 0)
@@ -266,8 +270,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&clear_err, 0, sizeof(clear_err));
clear_err.address = phys;
clear_err.length = len;
+ noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
sizeof(clear_err), &cmd_rc);
+ memalloc_noio_restore(noio_flag);
if (rc < 0)
return rc;
if (cmd_rc < 0)
@@ -905,19 +911,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
- size_t buf_len = 0, in_len = 0, out_len = 0;
static char out_env[ND_CMD_MAX_ENVELOPE];
static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
- unsigned int func = cmd;
- void __user *p = (void __user *) arg;
struct device *dev = &nvdimm_bus->dev;
- struct nd_cmd_pkg pkg;
+ void __user *p = (void __user *) arg;
const char *cmd_name, *dimm_name;
+ u32 in_len = 0, out_len = 0;
+ unsigned int func = cmd;
unsigned long cmd_mask;
- void *buf;
+ struct nd_cmd_pkg pkg;
int rc, i, cmd_rc;
+ u64 buf_len = 0;
+ void *buf;
if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd);
@@ -977,13 +984,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (cmd == ND_CMD_CALL) {
func = pkg.nd_command;
- dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n",
+ dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
__func__, dimm_name, pkg.nd_command,
in_len, out_len, buf_len);
-
- for (i = 0; i < ARRAY_SIZE(pkg.nd_reserved2); i++)
- if (pkg.nd_reserved2[i])
- return -EINVAL;
}
/* process an output envelope */
@@ -1007,9 +1010,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
out_len += out_size;
}
- buf_len = out_len + in_len;
+ buf_len = (u64) out_len + (u64) in_len;
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
- dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
+ dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
dimm_name, cmd_name, buf_len,
ND_IOCTL_MAX_BUFLEN);
return -EINVAL;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 4777046..b2fc29b 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -280,18 +280,11 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
- /*
- * FIXME: nsio_rw_bytes() may be called from atomic
- * context in the btt case and the ACPI DSM path for
- * clearing the error takes sleeping locks and allocates
- * memory. An explicit error clearing path, and support
- * for tracking badblocks in BTT metadata is needed to
- * work around this collision.
- */
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
&& !(flags & NVDIMM_IO_ATOMIC)) {
long cleared;
+ might_sleep();
cleared = nvdimm_clear_poison(&ndns->dev,
nsio->res.start + offset, size);
if (cleared < size)
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 75bc08c..bb71f0c 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -277,14 +277,14 @@ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
return 0;
}
-ssize_t nd_sector_size_show(unsigned long current_lbasize,
+ssize_t nd_size_select_show(unsigned long current_size,
const unsigned long *supported, char *buf)
{
ssize_t len = 0;
int i;
for (i = 0; supported[i]; i++)
- if (current_lbasize == supported[i])
+ if (current_size == supported[i])
len += sprintf(buf + len, "[%ld] ", supported[i]);
else
len += sprintf(buf + len, "%ld ", supported[i]);
@@ -292,8 +292,8 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
return len;
}
-ssize_t nd_sector_size_store(struct device *dev, const char *buf,
- unsigned long *current_lbasize, const unsigned long *supported)
+ssize_t nd_size_select_store(struct device *dev, const char *buf,
+ unsigned long *current_size, const unsigned long *supported)
{
unsigned long lbasize;
int rc, i;
@@ -310,7 +310,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
break;
if (supported[i]) {
- *current_lbasize = lbasize;
+ *current_size = lbasize;
return 0;
} else {
return -EINVAL;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 87796f8..9c5f108 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -45,12 +45,14 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
return ndd->nslabel_size;
}
-size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
{
- u32 index_span;
+ return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+}
- if (ndd->nsindex_size)
- return ndd->nsindex_size;
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+{
+ u32 nslot, space, size;
/*
* The minimum index space is 512 bytes, with that amount of
@@ -60,16 +62,16 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
* starts to waste space at larger config_sizes, but it's
* unlikely we'll ever see anything but 128K.
*/
- index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
- index_span /= NSINDEX_ALIGN * 2;
- ndd->nsindex_size = index_span * NSINDEX_ALIGN;
-
- return ndd->nsindex_size;
-}
-
-int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
-{
- return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+ nslot = nvdimm_num_label_slots(ndd);
+ space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
+ size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
+ NSINDEX_ALIGN) * 2;
+ if (size <= space)
+ return size / 2;
+
+ dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
+ ndd->nsarea.config_size, sizeof_namespace_label(ndd));
+ return 0;
}
static int __nd_label_validate(struct nvdimm_drvdata *ndd)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 5f1c675..1427a38 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1313,14 +1313,14 @@ static ssize_t sector_size_show(struct device *dev,
if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
- return nd_sector_size_show(nsblk->lbasize,
+ return nd_size_select_show(nsblk->lbasize,
blk_lbasize_supported, buf);
}
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
- return nd_sector_size_show(nspm->lbasize,
+ return nd_size_select_show(nspm->lbasize,
pmem_lbasize_supported, buf);
}
return -ENXIO;
@@ -1352,7 +1352,7 @@ static ssize_t sector_size_store(struct device *dev,
if (to_ndns(dev)->claim)
rc = -EBUSY;
if (rc >= 0)
- rc = nd_sector_size_store(dev, buf, lbasize, supported);
+ rc = nd_size_select_store(dev, buf, lbasize, supported);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index a87f793..9c758a9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -42,7 +42,7 @@ struct nd_poison {
struct nvdimm_drvdata {
struct device *dev;
- int nsindex_size, nslabel_size;
+ int nslabel_size;
struct nd_cmd_get_config_size nsarea;
void *data;
int ns_current, ns_next;
@@ -134,6 +134,7 @@ struct nd_mapping {
struct nvdimm *nvdimm;
u64 start;
u64 size;
+ int position;
struct list_head labels;
struct mutex lock;
/*
@@ -233,10 +234,10 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
void nd_device_notify(struct device *dev, enum nvdimm_event event);
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len);
-ssize_t nd_sector_size_show(unsigned long current_lbasize,
+ssize_t nd_size_select_show(unsigned long current_size,
const unsigned long *supported, char *buf);
-ssize_t nd_sector_size_store(struct device *dev, const char *buf,
- unsigned long *current_lbasize, const unsigned long *supported);
+ssize_t nd_size_select_store(struct device *dev, const char *buf,
+ unsigned long *current_size, const unsigned long *supported);
int __init nvdimm_init(void);
int __init nd_region_init(void);
int __init nd_label_init(void);
@@ -285,6 +286,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
struct nd_pfn *to_nd_pfn(struct device *dev);
#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
+#else
+#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
+#endif
+
int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
bool is_nd_pfn(struct device *dev);
struct device *nd_pfn_create(struct nd_region *nd_region);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 5fcb6f5..9576c44 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -111,24 +111,27 @@ static ssize_t align_show(struct device *dev,
return sprintf(buf, "%ld\n", nd_pfn->align);
}
-static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf)
+static const unsigned long *nd_pfn_supported_alignments(void)
{
- unsigned long val;
- int rc;
-
- rc = kstrtoul(buf, 0, &val);
- if (rc)
- return rc;
-
- if (!is_power_of_2(val) || val < PAGE_SIZE || val > SZ_1G)
- return -EINVAL;
+ /*
+ * This needs to be a non-static variable because the *_SIZE
+ * macros aren't always constants.
+ */
+ const unsigned long supported_alignments[] = {
+ PAGE_SIZE,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ HPAGE_PMD_SIZE,
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ HPAGE_PUD_SIZE,
+#endif
+#endif
+ 0,
+ };
+ static unsigned long data[ARRAY_SIZE(supported_alignments)];
- if (nd_pfn->dev.driver)
- return -EBUSY;
- else
- nd_pfn->align = val;
+ memcpy(data, supported_alignments, sizeof(data));
- return 0;
+ return data;
}
static ssize_t align_store(struct device *dev,
@@ -139,7 +142,8 @@ static ssize_t align_store(struct device *dev,
device_lock(dev);
nvdimm_bus_lock(dev);
- rc = __align_store(nd_pfn, buf);
+ rc = nd_size_select_store(dev, buf, &nd_pfn->align,
+ nd_pfn_supported_alignments());
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
@@ -260,6 +264,13 @@ static ssize_t size_show(struct device *dev,
}
static DEVICE_ATTR_RO(size);
+static ssize_t supported_alignments_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
+}
+static DEVICE_ATTR_RO(supported_alignments);
+
static struct attribute *nd_pfn_attributes[] = {
&dev_attr_mode.attr,
&dev_attr_namespace.attr,
@@ -267,6 +278,7 @@ static struct attribute *nd_pfn_attributes[] = {
&dev_attr_align.attr,
&dev_attr_resource.attr,
&dev_attr_size.attr,
+ &dev_attr_supported_alignments.attr,
NULL,
};
@@ -290,7 +302,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
return NULL;
nd_pfn->mode = PFN_MODE_NONE;
- nd_pfn->align = HPAGE_SIZE;
+ nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
dev = &nd_pfn->dev;
device_initialize(&nd_pfn->dev);
if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -638,11 +650,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
/ PAGE_SIZE);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
- * vmemmap_populate_hugepages() allocates the memmap array in
- * HPAGE_SIZE chunks.
+ * The altmap should be padded out to the block size used
+ * when populating the vmemmap. This *should* be equal to
+ * PMD_SIZE for most architectures.
*/
offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
- max(nd_pfn->align, HPAGE_SIZE)) - start;
+ max(nd_pfn->align, PMD_SIZE)) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K + dax_label_reserve,
nd_pfn->align) - start;
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 5434321..c5917f0 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -5,20 +5,6 @@
#include <linux/pfn_t.h>
#include <linux/fs.h>
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-void arch_wb_cache_pmem(void *addr, size_t size);
-void arch_invalidate_pmem(void *addr, size_t size);
-#else
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
-static inline void arch_wb_cache_pmem(void *addr, size_t size)
-{
-}
-static inline void arch_invalidate_pmem(void *addr, size_t size)
-{
-}
-#endif
-
/* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device {
/* One contiguous memory region per device */
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 5954cfb..829d760 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -723,8 +723,9 @@ static ssize_t mappingN(struct device *dev, char *buf, int n)
nd_mapping = &nd_region->mapping[n];
nvdimm = nd_mapping->nvdimm;
- return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev),
- nd_mapping->start, nd_mapping->size);
+ return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev),
+ nd_mapping->start, nd_mapping->size,
+ nd_mapping->position);
}
#define REGION_MAPPING(idx) \
@@ -965,6 +966,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
nd_region->mapping[i].nvdimm = nvdimm;
nd_region->mapping[i].start = mapping->start;
nd_region->mapping[i].size = mapping->size;
+ nd_region->mapping[i].position = mapping->position;
INIT_LIST_HEAD(&nd_region->mapping[i].labels);
mutex_init(&nd_region->mapping[i].lock);
OpenPOWER on IntegriCloud