diff options
author | Mark Hairgrove <mhairgrove@nvidia.com> | 2018-02-09 19:20:06 -0800 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2018-03-14 20:04:43 +1100 |
commit | 720c84046c26444fe825f8614ddceb5c46539e67 (patch) | |
tree | 07203313bbcdc9271fb1630bb4581a1579085f75 | |
parent | c2be663d5307fb9751a562ac664fa78cd7a00e2b (diff) | |
download | op-kernel-dev-720c84046c26444fe825f8614ddceb5c46539e67.zip op-kernel-dev-720c84046c26444fe825f8614ddceb5c46539e67.tar.gz |
powerpc/npu-dma.c: Fix crash after __mmu_notifier_register failure
pnv_npu2_init_context wasn't checking the return code from
__mmu_notifier_register. If __mmu_notifier_register failed, the
npu_context was still assigned to the mm and the caller wasn't given any
indication that things went wrong. Later on pnv_npu2_destroy_context would
be called, which in turn called mmu_notifier_unregister and dropped
mm->mm_count without having incremented it in the first place. This led to
various forms of corruption like mm use-after-free and mm double-free.
__mmu_notifier_register can fail with EINTR if a signal is pending, so
this case can be frequent.
This patch calls opal_npu_destroy_context on the failure paths, and makes
sure not to assign mm->context.npu_context until past the failure points.
Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com>
Acked-By: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 77d6061..69a4f9e 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -724,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); + nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); + if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", + &nvlink_index))) + return ERR_PTR(-ENODEV); + if (!mm || mm->context.id == 0) { /* * Kernel thread contexts are not supported and context id 0 is @@ -751,25 +756,30 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, */ npu_context = mm->context.npu_context; if (!npu_context) { + rc = -ENOMEM; npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (!npu_context) - return ERR_PTR(-ENOMEM); + if (npu_context) { + kref_init(&npu_context->kref); + npu_context->mm = mm; + npu_context->mn.ops = &nv_nmmu_notifier_ops; + rc = __mmu_notifier_register(&npu_context->mn, mm); + } + + if (rc) { + kfree(npu_context); + opal_npu_destroy_context(nphb->opal_id, mm->context.id, + PCI_DEVID(gpdev->bus->number, + gpdev->devfn)); + return ERR_PTR(rc); + } mm->context.npu_context = npu_context; - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - __mmu_notifier_register(&npu_context->mn, mm); - kref_init(&npu_context->kref); } else { - kref_get(&npu_context->kref); + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); } npu_context->release_cb = cb; npu_context->priv = priv; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); /* * npdev is a pci_dev pointer setup by the PCI code. We assign it to |