summaryrefslogtreecommitdiffstats
path: root/drivers/block/nvme-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-16 22:05:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-16 22:05:27 -0400
commita39ef1a7c6093bbd4e0a8197350b99cd635e5446 (patch)
tree5a19d1d1f289c52be72710f1d7cd954c830964f6 /drivers/block/nvme-core.c
parentd82312c80860b8b83cd4473ac6eafd244e712061 (diff)
parent976f2ab4985a1dbe3f3f0777ce01d4fdb404c62a (diff)
downloadop-kernel-dev-a39ef1a7c6093bbd4e0a8197350b99cd635e5446.zip
op-kernel-dev-a39ef1a7c6093bbd4e0a8197350b99cd635e5446.tar.gz
Merge branch 'for-4.1/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "This is the block driver pull request for 4.1. As with the core bits, this is a relatively slow round. This pull request contains: - Various fixes and cleanups for NVMe, from Alexey Khoroshilov, Chong Yuan, myself, Keith Busch, and Murali Iyer. - Documentation and code cleanups for nbd from Markus Pargmann. - Change of brd maintainer to me, from Ross Zwisler. At least the email doesn't bounce anymore then. - Two xen-blkback fixes from Tao Chen" * 'for-4.1/drivers' of git://git.kernel.dk/linux-block: (23 commits) NVMe: Meta data handling through submit io ioctl NVMe: Add translation for block limits NVMe: Remove check for null NVMe: Fix error handling of class_create("nvme") xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX xen-blkback: enlarge the array size of blkback name nbd: Return error pointer directly nbd: Return error code directly nbd: Remove fixme that was already fixed nbd: Restructure debugging prints nbd: Fix device bytesize type nbd: Replace kthread_create with kthread_run nbd: Remove kernel internal header Documentation: nbd: Add list of module parameters Documentation: nbd: Reformat to allow more documentation NVMe: increase depth of admin queue nvme: Fix PRP list calculation for non-4k system page size NVMe: Fix blk-mq hot cpu notification NVMe: embedded iod mask cleanup NVMe: Freeze admin queue on device failure ...
Diffstat (limited to 'drivers/block/nvme-core.c')
-rw-r--r--drivers/block/nvme-core.c159
1 files changed, 66 insertions, 93 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e23be20..85b8036 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -44,7 +44,7 @@
#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
-#define NVME_AQ_DEPTH 64
+#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
#define ADMIN_TIMEOUT (admin_timeout * HZ)
@@ -152,6 +152,7 @@ struct nvme_cmd_info {
*/
#define NVME_INT_PAGES 2
#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)
+#define NVME_INT_MASK 0x01
/*
* Will slightly overestimate the number of pages needed. This is OK
@@ -257,7 +258,7 @@ static void *iod_get_private(struct nvme_iod *iod)
*/
static bool iod_should_kfree(struct nvme_iod *iod)
{
- return (iod->private & 0x01) == 0;
+ return (iod->private & NVME_INT_MASK) == 0;
}
/* Special values must be less than 0x1000 */
@@ -301,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_completion *cqe)
{
- struct request *req = ctx;
-
u32 result = le32_to_cpup(&cqe->result);
u16 status = le16_to_cpup(&cqe->status) >> 1;
@@ -311,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
if (status == NVME_SC_SUCCESS)
dev_warn(nvmeq->q_dmadev,
"async event result %08x\n", result);
-
- blk_mq_free_hctx_request(nvmeq->hctx, req);
}
static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -432,7 +429,6 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
{
unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
sizeof(struct nvme_dsm_range);
- unsigned long mask = 0;
struct nvme_iod *iod;
if (rq->nr_phys_segments <= NVME_INT_PAGES &&
@@ -440,9 +436,8 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
iod = cmd->iod;
- mask = 0x01;
iod_init(iod, size, rq->nr_phys_segments,
- (unsigned long) rq | 0x01);
+ (unsigned long) rq | NVME_INT_MASK);
return iod;
}
@@ -522,8 +517,6 @@ static void nvme_dif_remap(struct request *req,
return;
pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
- if (!pmap)
- return;
p = pmap;
virt = bip_get_seed(bip);
@@ -645,12 +638,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
- int offset = offset_in_page(dma_addr);
+ u32 page_size = dev->page_size;
+ int offset = dma_addr & (page_size - 1);
__le64 *prp_list;
__le64 **list = iod_list(iod);
dma_addr_t prp_dma;
int nprps, i;
- u32 page_size = dev->page_size;
length -= (page_size - offset);
if (length <= 0)
@@ -1028,18 +1021,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
struct nvme_cmd_info *cmd_info;
struct request *req;
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false);
+ req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
if (IS_ERR(req))
return PTR_ERR(req);
req->cmd_flags |= REQ_NO_TIMEOUT;
cmd_info = blk_mq_rq_to_pdu(req);
- nvme_set_info(cmd_info, req, async_req_completion);
+ nvme_set_info(cmd_info, NULL, async_req_completion);
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
c.common.command_id = req->tag;
+ blk_mq_free_hctx_request(nvmeq->hctx, req);
return __nvme_submit_cmd(nvmeq, &c);
}
@@ -1347,6 +1341,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
+ if (!nvmeq->qid && nvmeq->dev->admin_q)
+ blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+
irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
@@ -1378,8 +1375,6 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
adapter_delete_sq(dev, qid);
adapter_delete_cq(dev, qid);
}
- if (!qid && dev->admin_q)
- blk_mq_freeze_queue_start(dev->admin_q);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
@@ -1583,6 +1578,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1;
dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
+ dev->admin_tagset.reserved_tags = 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -1749,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
struct nvme_dev *dev = ns->dev;
struct nvme_user_io io;
struct nvme_command c;
- unsigned length, meta_len;
- int status, i;
- struct nvme_iod *iod, *meta_iod = NULL;
- dma_addr_t meta_dma_addr;
- void *meta, *uninitialized_var(meta_mem);
+ unsigned length, meta_len, prp_len;
+ int status, write;
+ struct nvme_iod *iod;
+ dma_addr_t meta_dma = 0;
+ void *meta = NULL;
if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
- if (meta_len && ((io.metadata & 3) || !io.metadata))
+ if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL;
+ else if (meta_len && ns->ext) {
+ length += meta_len;
+ meta_len = 0;
+ }
+
+ write = io.opcode & 1;
switch (io.opcode) {
case nvme_cmd_write:
case nvme_cmd_read:
case nvme_cmd_compare:
- iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
+ iod = nvme_map_user_pages(dev, write, io.addr, length);
break;
default:
return -EINVAL;
@@ -1776,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
if (IS_ERR(iod))
return PTR_ERR(iod);
+ prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+ if (length != prp_len) {
+ status = -ENOMEM;
+ goto unmap;
+ }
+ if (meta_len) {
+ meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+ &meta_dma, GFP_KERNEL);
+ if (!meta) {
+ status = -ENOMEM;
+ goto unmap;
+ }
+ if (write) {
+ if (copy_from_user(meta, (void __user *)io.metadata,
+ meta_len)) {
+ status = -EFAULT;
+ goto unmap;
+ }
+ }
+ }
+
memset(&c, 0, sizeof(c));
c.rw.opcode = io.opcode;
c.rw.flags = io.flags;
@@ -1787,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
-
- if (meta_len) {
- meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
- meta_len);
- if (IS_ERR(meta_iod)) {
- status = PTR_ERR(meta_iod);
- meta_iod = NULL;
- goto unmap;
- }
-
- meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
- &meta_dma_addr, GFP_KERNEL);
- if (!meta_mem) {
- status = -ENOMEM;
- goto unmap;
- }
-
- if (io.opcode & 1) {
- int meta_offset = 0;
-
- for (i = 0; i < meta_iod->nents; i++) {
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
- meta_iod->sg[i].offset;
- memcpy(meta_mem + meta_offset, meta,
- meta_iod->sg[i].length);
- kunmap_atomic(meta);
- meta_offset += meta_iod->sg[i].length;
- }
- }
-
- c.rw.metadata = cpu_to_le64(meta_dma_addr);
- }
-
- length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma);
-
- if (length != (io.nblocks + 1) << ns->lba_shift)
- status = -ENOMEM;
- else
- status = nvme_submit_io_cmd(dev, ns, &c, NULL);
-
- if (meta_len) {
- if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
- int meta_offset = 0;
-
- for (i = 0; i < meta_iod->nents; i++) {
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
- meta_iod->sg[i].offset;
- memcpy(meta, meta_mem + meta_offset,
- meta_iod->sg[i].length);
- kunmap_atomic(meta);
- meta_offset += meta_iod->sg[i].length;
- }
- }
-
- dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
- meta_dma_addr);
- }
-
+ c.rw.metadata = cpu_to_le64(meta_dma);
+ status = nvme_submit_io_cmd(dev, ns, &c, NULL);
unmap:
- nvme_unmap_user_pages(dev, io.opcode & 1, iod);
+ nvme_unmap_user_pages(dev, write, iod);
nvme_free_iod(dev, iod);
-
- if (meta_iod) {
- nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
- nvme_free_iod(dev, meta_iod);
+ if (meta) {
+ if (status == NVME_SC_SUCCESS && !write) {
+ if (copy_to_user((void __user *)io.metadata, meta,
+ meta_len))
+ status = -EFAULT;
+ }
+ dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
}
-
return status;
}
@@ -2018,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
dma_addr_t dma_addr;
- int lbaf, pi_type, old_ms;
+ u8 lbaf, pi_type;
+ u16 old_ms;
unsigned short bs;
id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
@@ -2039,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/*
* If identify namespace failed, use default 512 byte block size so
@@ -2055,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
- (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+ (ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type;
blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
- !(id->flbas & NVME_NS_FLBAS_META_EXT))
+ !ns->ext)
nvme_init_integrity(ns);
if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
@@ -2334,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1;
dev->vwc = ctrl->vwc;
- dev->event_limit = min(ctrl->aerl + 1, 8);
memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2881,6 +2851,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
nvme_set_irq_hints(dev);
+ dev->event_limit = 1;
return result;
free_tags:
@@ -3166,8 +3137,10 @@ static int __init nvme_init(void)
nvme_char_major = result;
nvme_class = class_create(THIS_MODULE, "nvme");
- if (!nvme_class)
+ if (IS_ERR(nvme_class)) {
+ result = PTR_ERR(nvme_class);
goto unregister_chrdev;
+ }
result = pci_register_driver(&nvme_driver);
if (result)
OpenPOWER on IntegriCloud