diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 22:05:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-16 22:05:27 -0400 |
commit | a39ef1a7c6093bbd4e0a8197350b99cd635e5446 (patch) | |
tree | 5a19d1d1f289c52be72710f1d7cd954c830964f6 /drivers/block/nvme-core.c | |
parent | d82312c80860b8b83cd4473ac6eafd244e712061 (diff) | |
parent | 976f2ab4985a1dbe3f3f0777ce01d4fdb404c62a (diff) | |
download | op-kernel-dev-a39ef1a7c6093bbd4e0a8197350b99cd635e5446.zip op-kernel-dev-a39ef1a7c6093bbd4e0a8197350b99cd635e5446.tar.gz |
Merge branch 'for-4.1/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"This is the block driver pull request for 4.1. As with the core bits,
this is a relatively slow round. This pull request contains:
- Various fixes and cleanups for NVMe, from Alexey Khoroshilov, Chong
Yuan, myself, Keith Busch, and Murali Iyer.
- Documentation and code cleanups for nbd from Markus Pargmann.
- Change of brd maintainer to me, from Ross Zwisler. At least the
email doesn't bounce anymore then.
- Two xen-blkback fixes from Tao Chen"
* 'for-4.1/drivers' of git://git.kernel.dk/linux-block: (23 commits)
NVMe: Meta data handling through submit io ioctl
NVMe: Add translation for block limits
NVMe: Remove check for null
NVMe: Fix error handling of class_create("nvme")
xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX
xen-blkback: enlarge the array size of blkback name
nbd: Return error pointer directly
nbd: Return error code directly
nbd: Remove fixme that was already fixed
nbd: Restructure debugging prints
nbd: Fix device bytesize type
nbd: Replace kthread_create with kthread_run
nbd: Remove kernel internal header
Documentation: nbd: Add list of module parameters
Documentation: nbd: Reformat to allow more documentation
NVMe: increase depth of admin queue
nvme: Fix PRP list calculation for non-4k system page size
NVMe: Fix blk-mq hot cpu notification
NVMe: embedded iod mask cleanup
NVMe: Freeze admin queue on device failure
...
Diffstat (limited to 'drivers/block/nvme-core.c')
-rw-r--r-- | drivers/block/nvme-core.c | 159 |
1 files changed, 66 insertions, 93 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e23be20..85b8036 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -44,7 +44,7 @@ #define NVME_MINORS (1U << MINORBITS) #define NVME_Q_DEPTH 1024 -#define NVME_AQ_DEPTH 64 +#define NVME_AQ_DEPTH 256 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define ADMIN_TIMEOUT (admin_timeout * HZ) @@ -152,6 +152,7 @@ struct nvme_cmd_info { */ #define NVME_INT_PAGES 2 #define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size) +#define NVME_INT_MASK 0x01 /* * Will slightly overestimate the number of pages needed. This is OK @@ -257,7 +258,7 @@ static void *iod_get_private(struct nvme_iod *iod) */ static bool iod_should_kfree(struct nvme_iod *iod) { - return (iod->private & 0x01) == 0; + return (iod->private & NVME_INT_MASK) == 0; } /* Special values must be less than 0x1000 */ @@ -301,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn) static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { - struct request *req = ctx; - u32 result = le32_to_cpup(&cqe->result); u16 status = le16_to_cpup(&cqe->status) >> 1; @@ -311,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, if (status == NVME_SC_SUCCESS) dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result); - - blk_mq_free_hctx_request(nvmeq->hctx, req); } static void abort_completion(struct nvme_queue *nvmeq, void *ctx, @@ -432,7 +429,6 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, { unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) : sizeof(struct nvme_dsm_range); - unsigned long mask = 0; struct nvme_iod *iod; if (rq->nr_phys_segments <= NVME_INT_PAGES && @@ -440,9 +436,8 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq); iod = cmd->iod; - mask = 0x01; iod_init(iod, size, rq->nr_phys_segments, - (unsigned long) rq | 0x01); + (unsigned long) rq | NVME_INT_MASK); return iod; } @@ -522,8 +517,6 @@ static void nvme_dif_remap(struct request *req, return; pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; - if (!pmap) - return; p = pmap; virt = bip_get_seed(bip); @@ -645,12 +638,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, struct scatterlist *sg = iod->sg; int dma_len = sg_dma_len(sg); u64 dma_addr = sg_dma_address(sg); - int offset = offset_in_page(dma_addr); + u32 page_size = dev->page_size; + int offset = dma_addr & (page_size - 1); __le64 *prp_list; __le64 **list = iod_list(iod); dma_addr_t prp_dma; int nprps, i; - u32 page_size = dev->page_size; length -= (page_size - offset); if (length <= 0) @@ -1028,18 +1021,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) struct nvme_cmd_info *cmd_info; struct request *req; - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false); + req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true); if (IS_ERR(req)) return PTR_ERR(req); req->cmd_flags |= REQ_NO_TIMEOUT; cmd_info = blk_mq_rq_to_pdu(req); - nvme_set_info(cmd_info, req, async_req_completion); + nvme_set_info(cmd_info, NULL, async_req_completion); memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = req->tag; + blk_mq_free_hctx_request(nvmeq->hctx, req); return __nvme_submit_cmd(nvmeq, &c); } @@ -1347,6 +1341,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); + if (!nvmeq->qid && nvmeq->dev->admin_q) + blk_mq_freeze_queue_start(nvmeq->dev->admin_q); + irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); @@ -1378,8 +1375,6 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) adapter_delete_sq(dev, qid); adapter_delete_cq(dev, qid); } - if (!qid && dev->admin_q) - blk_mq_freeze_queue_start(dev->admin_q); spin_lock_irq(&nvmeq->q_lock); nvme_process_cq(nvmeq); @@ -1583,6 +1578,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.ops = &nvme_mq_admin_ops; dev->admin_tagset.nr_hw_queues = 1; dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; + dev->admin_tagset.reserved_tags = 1; dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); @@ -1749,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) struct nvme_dev *dev = ns->dev; struct nvme_user_io io; struct nvme_command c; - unsigned length, meta_len; - int status, i; - struct nvme_iod *iod, *meta_iod = NULL; - dma_addr_t meta_dma_addr; - void *meta, *uninitialized_var(meta_mem); + unsigned length, meta_len, prp_len; + int status, write; + struct nvme_iod *iod; + dma_addr_t meta_dma = 0; + void *meta = NULL; if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - if (meta_len && ((io.metadata & 3) || !io.metadata)) + if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext) return -EINVAL; + else if (meta_len && ns->ext) { + length += meta_len; + meta_len = 0; + } + + write = io.opcode & 1; switch (io.opcode) { case nvme_cmd_write: case nvme_cmd_read: case nvme_cmd_compare: - iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length); + iod = nvme_map_user_pages(dev, write, io.addr, length); break; default: return -EINVAL; @@ -1776,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) if (IS_ERR(iod)) return PTR_ERR(iod); + prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL); + if (length != prp_len) { + status = -ENOMEM; + goto unmap; + } + if (meta_len) { + meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, + &meta_dma, GFP_KERNEL); + if (!meta) { + status = -ENOMEM; + goto unmap; + } + if (write) { + if (copy_from_user(meta, (void __user *)io.metadata, + meta_len)) { + status = -EFAULT; + goto unmap; + } + } + } + memset(&c, 0, sizeof(c)); c.rw.opcode = io.opcode; c.rw.flags = io.flags; @@ -1787,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.reftag = cpu_to_le32(io.reftag); c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - - if (meta_len) { - meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, - meta_len); - if (IS_ERR(meta_iod)) { - status = PTR_ERR(meta_iod); - meta_iod = NULL; - goto unmap; - } - - meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, - &meta_dma_addr, GFP_KERNEL); - if (!meta_mem) { - status = -ENOMEM; - goto unmap; - } - - if (io.opcode & 1) { - int meta_offset = 0; - - for (i = 0; i < meta_iod->nents; i++) { - meta = kmap_atomic(sg_page(&meta_iod->sg[i])) + - meta_iod->sg[i].offset; - memcpy(meta_mem + meta_offset, meta, - meta_iod->sg[i].length); - kunmap_atomic(meta); - meta_offset += meta_iod->sg[i].length; - } - } - - c.rw.metadata = cpu_to_le64(meta_dma_addr); - } - - length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); c.rw.prp2 = cpu_to_le64(iod->first_dma); - - if (length != (io.nblocks + 1) << ns->lba_shift) - status = -ENOMEM; - else - status = nvme_submit_io_cmd(dev, ns, &c, NULL); - - if (meta_len) { - if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { - int meta_offset = 0; - - for (i = 0; i < meta_iod->nents; i++) { - meta = kmap_atomic(sg_page(&meta_iod->sg[i])) + - meta_iod->sg[i].offset; - memcpy(meta, meta_mem + meta_offset, - meta_iod->sg[i].length); - kunmap_atomic(meta); - meta_offset += meta_iod->sg[i].length; - } - } - - dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem, - meta_dma_addr); - } - + c.rw.metadata = cpu_to_le64(meta_dma); + status = nvme_submit_io_cmd(dev, ns, &c, NULL); unmap: - nvme_unmap_user_pages(dev, io.opcode & 1, iod); + nvme_unmap_user_pages(dev, write, iod); nvme_free_iod(dev, iod); - - if (meta_iod) { - nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod); - nvme_free_iod(dev, meta_iod); + if (meta) { + if (status == NVME_SC_SUCCESS && !write) { + if (copy_to_user((void __user *)io.metadata, meta, + meta_len)) + status = -EFAULT; + } + dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); } - return status; } @@ -2018,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_dev *dev = ns->dev; struct nvme_id_ns *id; dma_addr_t dma_addr; - int lbaf, pi_type, old_ms; + u8 lbaf, pi_type; + u16 old_ms; unsigned short bs; id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, @@ -2039,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; ns->lba_shift = id->lbaf[lbaf].ds; ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* * If identify namespace failed, use default 512 byte block size so @@ -2055,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (blk_get_integrity(disk) && (ns->pi_type != pi_type || ns->ms != old_ms || bs != queue_logical_block_size(disk->queue) || - (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT))) + (ns->ms && ns->ext))) blk_integrity_unregister(disk); ns->pi_type = pi_type; blk_queue_logical_block_size(ns->queue, bs); if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) && - !(id->flbas & NVME_NS_FLBAS_META_EXT)) + !ns->ext) nvme_init_integrity(ns); if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) @@ -2334,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; dev->vwc = ctrl->vwc; - dev->event_limit = min(ctrl->aerl + 1, 8); memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); @@ -2881,6 +2851,7 @@ static int nvme_dev_start(struct nvme_dev *dev) nvme_set_irq_hints(dev); + dev->event_limit = 1; return result; free_tags: @@ -3166,8 +3137,10 @@ static int __init nvme_init(void) nvme_char_major = result; nvme_class = class_create(THIS_MODULE, "nvme"); - if (!nvme_class) + if (IS_ERR(nvme_class)) { + result = PTR_ERR(nvme_class); goto unregister_chrdev; + } result = pci_register_driver(&nvme_driver); if (result) |