summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c216
1 files changed, 111 insertions, 105 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4276ebf..6fe7af0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -75,7 +75,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
- struct nvme_queue **queues;
+ struct nvme_queue *queues;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
@@ -365,7 +365,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[0];
+ struct nvme_queue *nvmeq = &dev->queues[0];
WARN_ON(hctx_idx != 0);
WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
@@ -387,7 +387,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
if (!nvmeq->tags)
nvmeq->tags = &dev->tagset.tags[hctx_idx];
@@ -403,7 +403,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
- struct nvme_queue *nvmeq = dev->queues[queue_idx];
+ struct nvme_queue *nvmeq = &dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
@@ -1044,7 +1044,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
- struct nvme_queue *nvmeq = dev->queues[0];
+ struct nvme_queue *nvmeq = &dev->queues[0];
struct nvme_command c;
memset(&c, 0, sizeof(c));
@@ -1138,9 +1138,14 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
*/
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ /* If there is a reset/reinit ongoing, we shouldn't reset again. */
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RECONNECTING:
return false;
+ default:
+ break;
+ }
/* We shouldn't reset unless the controller is on fatal error state
* _or_ if we lost the communication with it.
@@ -1280,7 +1285,6 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
if (nvmeq->sq_cmds)
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
- kfree(nvmeq);
}
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1288,10 +1292,8 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
int i;
for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
dev->ctrl.queue_count--;
- dev->queues[i] = NULL;
- nvme_free_queue(nvmeq);
+ nvme_free_queue(&dev->queues[i]);
}
}
@@ -1323,12 +1325,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
- struct nvme_queue *nvmeq = dev->queues[0];
-
- if (!nvmeq)
- return;
- if (nvme_suspend_queue(nvmeq))
- return;
+ struct nvme_queue *nvmeq = &dev->queues[0];
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
@@ -1367,7 +1364,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+ if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
@@ -1382,13 +1379,13 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
return 0;
}
-static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth, int node)
+static int nvme_alloc_queue(struct nvme_dev *dev, int qid,
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
- node);
- if (!nvmeq)
- return NULL;
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+
+ if (dev->ctrl.queue_count > qid)
+ return 0;
nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
@@ -1407,17 +1404,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_depth = depth;
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
- dev->queues[qid] = nvmeq;
dev->ctrl.queue_count++;
- return nvmeq;
+ return 0;
free_cqdma:
dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
- kfree(nvmeq);
- return NULL;
+ return -ENOMEM;
}
static int queue_request_irq(struct nvme_queue *nvmeq)
@@ -1590,14 +1585,12 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
if (result < 0)
return result;
- nvmeq = dev->queues[0];
- if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
- dev_to_node(dev->dev));
- if (!nvmeq)
- return -ENOMEM;
- }
+ result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
+ if (result)
+ return result;
+ nvmeq = &dev->queues[0];
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
@@ -1627,7 +1620,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
/* vector == qid - 1, match nvme_create_queue */
- if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ if (nvme_alloc_queue(dev, i, dev->q_depth,
pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
@@ -1636,15 +1629,15 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
- ret = nvme_create_queue(dev->queues[i], i);
+ ret = nvme_create_queue(&dev->queues[i], i);
if (ret)
break;
}
/*
* Ignore failing Create SQ/CQ commands, we can continue with less
- * than the desired aount of queues, and even a controller without
- * I/O queues an still be used to issue admin commands. This might
+ * than the desired amount of queues, and even a controller without
+ * I/O queues can still be used to issue admin commands. This might
* be useful to upgrade a buggy firmware for example.
*/
return ret >= 0 ? 0 : ret;
@@ -1661,30 +1654,40 @@ static ssize_t nvme_cmb_show(struct device *dev,
}
static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
-static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
{
- u64 szu, size, offset;
+ u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
+
+ return 1ULL << (12 + 4 * szu);
+}
+
+static u32 nvme_cmb_size(struct nvme_dev *dev)
+{
+ return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK;
+}
+
+static void nvme_map_cmb(struct nvme_dev *dev)
+{
+ u64 size, offset;
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- void __iomem *cmb;
int bar;
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
- if (!(NVME_CMB_SZ(dev->cmbsz)))
- return NULL;
+ if (!dev->cmbsz)
+ return;
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
if (!use_cmb_sqes)
- return NULL;
+ return;
- szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
- size = szu * NVME_CMB_SZ(dev->cmbsz);
- offset = szu * NVME_CMB_OFST(dev->cmbloc);
+ size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
+ offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
bar = NVME_CMB_BIR(dev->cmbloc);
bar_size = pci_resource_len(pdev, bar);
if (offset > bar_size)
- return NULL;
+ return;
/*
* Controllers may support a CMB size larger than their BAR,
@@ -1694,13 +1697,16 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
- if (!cmb)
- return NULL;
-
+ dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
+ if (!dev->cmb)
+ return;
dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
dev->cmb_size = size;
- return cmb;
+
+ if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL))
+ dev_warn(dev->ctrl.device,
+ "failed to add sysfs attribute for CMB\n");
}
static inline void nvme_release_cmb(struct nvme_dev *dev)
@@ -1768,7 +1774,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
dma_addr_t descs_dma;
int i = 0;
void **bufs;
- u64 size = 0, tmp;
+ u64 size, tmp;
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
@@ -1851,7 +1857,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
u64 min = (u64)dev->ctrl.hmmin * 4096;
u32 enable_bits = NVME_HOST_MEM_ENABLE;
- int ret = 0;
+ int ret;
preferred = min(preferred, max);
if (min > max) {
@@ -1892,7 +1898,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
- struct nvme_queue *adminq = dev->queues[0];
+ struct nvme_queue *adminq = &dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, nr_io_queues;
unsigned long size;
@@ -1905,7 +1911,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
+ if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
@@ -2005,9 +2011,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
+static void nvme_disable_io_queues(struct nvme_dev *dev)
{
- int pass;
+ int pass, queues = dev->online_queues - 1;
unsigned long timeout;
u8 opcode = nvme_admin_delete_sq;
@@ -2018,7 +2024,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
retry:
timeout = ADMIN_TIMEOUT;
for (; i > 0; i--, sent++)
- if (nvme_delete_queue(dev->queues[i], opcode))
+ if (nvme_delete_queue(&dev->queues[i], opcode))
break;
while (sent--) {
@@ -2033,13 +2039,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
}
/*
- * Return: error value if an error occurred setting up the queues or calling
- * Identify Device. 0 if these succeeded, even if adding some of the
- * namespaces failed. At the moment, these failures are silent. TBD which
- * failures should be reported.
+ * return error value only when tagset allocation failed
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
+ int ret;
+
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
@@ -2055,8 +2060,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev;
- if (blk_mq_alloc_tag_set(&dev->tagset))
- return 0;
+ ret = blk_mq_alloc_tag_set(&dev->tagset);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "IO queues tagset allocation failed %d\n", ret);
+ return ret;
+ }
dev->ctrl.tagset = &dev->tagset;
nvme_dbbuf_set(dev);
@@ -2122,22 +2131,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
"set queue depth=%u\n", dev->q_depth);
}
- /*
- * CMBs can currently only exist on >=1.2 PCIe devices. We only
- * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group
- * has no name we can pass NULL as final argument to
- * sysfs_add_file_to_group.
- */
-
- if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
- dev->cmb = nvme_map_cmb(dev);
- if (dev->cmb) {
- if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL))
- dev_warn(dev->ctrl.device,
- "failed to add sysfs attribute for CMB\n");
- }
- }
+ nvme_map_cmb(dev);
pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
@@ -2170,7 +2164,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- int i, queues;
+ int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -2205,21 +2199,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
}
nvme_stop_queues(&dev->ctrl);
- queues = dev->online_queues - 1;
- for (i = dev->ctrl.queue_count - 1; i > 0; i--)
- nvme_suspend_queue(dev->queues[i]);
-
- if (dead) {
- /* A device might become IO incapable very soon during
- * probe, before the admin queue is configured. Thus,
- * queue_count can be 0 here.
- */
- if (dev->ctrl.queue_count)
- nvme_suspend_queue(dev->queues[0]);
- } else {
- nvme_disable_io_queues(dev, queues);
+ if (!dead) {
+ nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
+ for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
+ nvme_suspend_queue(&dev->queues[i]);
+
nvme_pci_disable(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2289,6 +2275,7 @@ static void nvme_reset_work(struct work_struct *work)
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
+ enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
goto out;
@@ -2300,6 +2287,16 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
+ /*
+ * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
+ * initializing procedure here.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller RECONNECTING\n");
+ goto out;
+ }
+
result = nvme_pci_enable(dev);
if (result)
goto out;
@@ -2352,15 +2349,23 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
+ new_state = NVME_CTRL_ADMIN_ONLY;
} else {
nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- nvme_dev_add(dev);
+ /* hit this only when allocate tagset fails */
+ if (nvme_dev_add(dev))
+ new_state = NVME_CTRL_ADMIN_ONLY;
nvme_unfreeze(&dev->ctrl);
}
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
- dev_warn(dev->ctrl.device, "failed to mark controller live\n");
+ /*
+ * If only admin queue live, keep it to do further investigation or
+ * recovery.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller state %d\n", new_state);
goto out;
}
@@ -2468,8 +2473,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev)
return -ENOMEM;
- dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *),
- GFP_KERNEL, node);
+
+ dev->queues = kcalloc_node(num_possible_cpus() + 1,
+ sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues)
goto free;
@@ -2496,10 +2502,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release_pools;
- nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_wq, &dev->ctrl.reset_work);
+ nvme_reset_ctrl(&dev->ctrl);
+
return 0;
release_pools:
@@ -2523,7 +2529,7 @@ static void nvme_reset_prepare(struct pci_dev *pdev)
static void nvme_reset_done(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_reset_ctrl(&dev->ctrl);
+ nvme_reset_ctrl_sync(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
OpenPOWER on IntegriCloud