summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-11 15:36:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-11 15:36:52 -0700
commit130568d5eac5537cbd64cfb12103550af90edb79 (patch)
tree56d582ec11543bf5480822c3ef6c2b118bb70505 /drivers/nvme/host/pci.c
parent908b852df1d5d27d289e915fea7bfc16d38b8a76 (diff)
parentb222dd2fdd53a40dd8f1d3082ae98e52883cce0d (diff)
downloadop-kernel-dev-130568d5eac5537cbd64cfb12103550af90edb79.zip
op-kernel-dev-130568d5eac5537cbd64cfb12103550af90edb79.tar.gz
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "This is a followup for block changes, that didn't make the initial pull request. It's a bit of a mixed bag, this contains: - A followup pull request from Sagi for NVMe. Outside of fixups for NVMe, it also includes a series for ensuring that we properly quiesce hardware queues when browsing live tags. - Set of integrity fixes from Dmitry (mostly), fixing various issues for folks using DIF/DIX. - Fix for a bug introduced in cciss, with the req init changes. From Christoph. - Fix for a bug in BFQ, from Paolo. - Two followup fixes for lightnvm/pblk from Javier. - Depth fix from Ming for blk-mq-sched. - Also from Ming, performance fix for mtip32xx that was introduced with the dynamic initialization of commands" * 'for-linus' of git://git.kernel.dk/linux-block: (44 commits) block: call bio_uninit in bio_endio nvmet: avoid unneeded assignment of submit_bio return value nvme-pci: add module parameter for io queue depth nvme-pci: compile warnings in nvme_alloc_host_mem() nvmet_fc: Accept variable pad lengths on Create Association LS nvme_fc/nvmet_fc: revise Create Association descriptor length lightnvm: pblk: remove unnecessary checks lightnvm: pblk: control I/O flow also on tear down cciss: initialize struct scsi_req null_blk: fix error flow for shared tags during module_init block: Fix __blkdev_issue_zeroout loop nvme-rdma: unconditionally recycle the request mr nvme: split nvme_uninit_ctrl into stop and uninit virtio_blk: quiesce/unquiesce live IO when entering PM states mtip32xx: quiesce request queues to make sure no submissions are inflight nbd: quiesce request queues to make sure no submissions are inflight nvme: kick requeue list when requeueing a request instead of when starting the queues nvme-pci: quiesce/unquiesce admin_q instead of start/stop its hw queues nvme-loop: quiesce/unquiesce admin_q instead of start/stop its hw queues nvme-fc: quiesce/unquiesce admin_q instead of start/stop its hw queues ...
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c96
1 files changed, 58 insertions, 38 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b7a84c5..d10d2f2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -35,7 +35,6 @@
#include "nvme.h"
-#define NVME_Q_DEPTH 1024
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
MODULE_PARM_DESC(max_host_mem_size_mb,
"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+ .set = io_queue_depth_set,
+ .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
struct nvme_dev;
struct nvme_queue;
@@ -74,7 +83,6 @@ struct nvme_dev {
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
- unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
@@ -105,6 +113,17 @@ struct nvme_dev {
void **host_mem_desc_bufs;
};
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 2)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
{
return qid * 2 * stride;
@@ -1099,9 +1118,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
{
int i;
- for (i = dev->queue_count - 1; i >= lowest; i--) {
+ for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
- dev->queue_count--;
+ dev->ctrl.queue_count--;
dev->queues[i] = NULL;
nvme_free_queue(nvmeq);
}
@@ -1126,7 +1145,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+ blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
@@ -1145,8 +1164,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
- nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
- dev->bar + NVME_REG_CAP));
+ nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
@@ -1221,7 +1239,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
- dev->queue_count++;
+ dev->ctrl.queue_count++;
return nvmeq;
@@ -1317,7 +1335,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1354,7 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return -ENODEV;
}
} else
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
return 0;
}
@@ -1385,11 +1403,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
return 0;
}
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1397,13 +1414,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
- NVME_CAP_NSSRC(cap) : 0;
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result < 0)
return result;
@@ -1422,7 +1439,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result)
return result;
@@ -1441,7 +1458,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
unsigned i, max;
int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++) {
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
/* vector == qid - 1, match nvme_create_queue */
if (!nvme_alloc_queue(dev, i, dev->q_depth,
pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1467,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
}
}
- max = min(dev->max_qid, dev->queue_count - 1);
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret)
@@ -1585,9 +1602,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
struct nvme_host_mem_buf_desc *descs;
- u32 chunk_size, max_entries, i = 0;
+ u32 chunk_size, max_entries;
+ int i = 0;
void **bufs;
- u64 size, tmp;
+ u64 size = 0, tmp;
/* start big and work our way down */
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
@@ -1866,7 +1884,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
static int nvme_pci_enable(struct nvme_dev *dev)
{
- u64 cap;
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -1893,10 +1910,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
if (result < 0)
return result;
- cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+ dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
- dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
- dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
dev->dbs = dev->bar + 4096;
/*
@@ -1908,6 +1926,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
"set queue depth=%u to work around controller resets\n",
dev->q_depth);
+ } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+ (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+ NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+ dev->q_depth = 64;
+ dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+ "set queue depth=%u\n", dev->q_depth);
}
/*
@@ -1996,7 +2020,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
- for (i = dev->queue_count - 1; i > 0; i--)
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
if (dead) {
@@ -2004,7 +2028,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
*/
- if (dev->queue_count)
+ if (dev->ctrl.queue_count)
nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev, queues);
@@ -2094,7 +2118,7 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
- result = nvme_configure_admin_queue(dev);
+ result = nvme_pci_configure_admin_queue(dev);
if (result)
goto out;
@@ -2133,15 +2157,6 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
/*
- * A controller that can not execute IO typically requires user
- * intervention to correct. For such degraded controllers, the driver
- * should not submit commands the user did not request, so skip
- * registering for asynchronous event notification on this condition.
- */
- if (dev->online_queues > 1)
- nvme_queue_async_events(&dev->ctrl);
-
- /*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
*/
@@ -2161,8 +2176,7 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
- if (dev->online_queues > 1)
- nvme_queue_scan(&dev->ctrl);
+ nvme_start_ctrl(&dev->ctrl);
return;
out:
@@ -2341,11 +2355,13 @@ static void nvme_remove(struct pci_dev *pdev)
}
flush_work(&dev->ctrl.reset_work);
- nvme_uninit_ctrl(&dev->ctrl);
+ nvme_stop_ctrl(&dev->ctrl);
+ nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
+ nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
@@ -2458,6 +2474,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
OpenPOWER on IntegriCloud