diff options
author | Tadeusz Struk <tadeusz.struk@intel.com> | 2016-10-25 08:57:55 -0700 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-11-15 16:16:41 -0500 |
commit | acd7c8fe14938a315f0ac1b92a92375f7226c2fd (patch) | |
tree | a7c672b8310d578ed82ea0f9f353ee8826abcfd9 /drivers/infiniband/hw/hfi1 | |
parent | d9ac4555fb2bcd6b794aaa0b39acad81111d9f42 (diff) | |
download | op-kernel-dev-acd7c8fe14938a315f0ac1b92a92375f7226c2fd.zip op-kernel-dev-acd7c8fe14938a315f0ac1b92a92375f7226c2fd.tar.gz |
IB/hfi1: Fix an Oops on pci device force remove
This patch fixes an Oops on device unbind, when the device is used
by a PSM user process. PSM processes access device resources which
are freed on device removal. Similar protection exists in uverbs
in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence
a separate protection is required for PSM clients.
Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/file_ops.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c | 21 |
4 files changed, 44 insertions, 5 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23..7992152 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; + init_completion(&dd->user_comp); + + /* The user refcount starts with one to inidicate an active device */ + atomic_set(&dd->user_refcount, 1); + goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 677efa0..bd786b7 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!atomic_inc_not_zero(&dd->user_refcount)) + return -ENXIO; + /* Just take a ref now. Not all opens result in a context assign */ kobject_get(&dd->kobj); @@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; atomic_inc(&fd->mm->mm_count); - } + fp->private_data = fd; + } else { + fp->private_data = NULL; + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); - fp->private_data = fd; + return -ENOMEM; + } - return fd ? 0 : -ENOMEM; + return 0; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) done: mmdrop(fdata->mm); kobject_put(&dd->kobj); + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3c06d204..368e96c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1174,6 +1174,10 @@ struct hfi1_devdata { spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; + /* Keeps track of user space clients */ + atomic_t user_refcount; + /* Used to wait for outstanding user space clients before dev removal */ + struct completion user_comp; struct hfi1_affinity *affinity; struct rhashtable sdma_rht; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 60db615..e28a6b6 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1538,12 +1538,31 @@ bail: return ret; } +static void wait_for_clients(struct hfi1_devdata *dd) +{ + /* + * Remove the device init value and complete the device if there is + * no clients or wait for active clients to finish. + */ + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + wait_for_completion(&dd->user_comp); +} + static void remove_one(struct pci_dev *pdev) { struct hfi1_devdata *dd = pci_get_drvdata(pdev); /* close debugfs files before ib unregister */ hfi1_dbg_ibdev_exit(&dd->verbs_dev); + + /* remove the /dev hfi1 interface */ + hfi1_device_remove(dd); + + /* wait for existing user space clients to finish */ + wait_for_clients(dd); + /* unregister from IB core */ hfi1_unregister_ib_device(dd); @@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev) /* wait until all of our (qsfp) queue_work() calls complete */ flush_workqueue(ib_wq); - hfi1_device_remove(dd); - postinit_cleanup(dd); } |