summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_events.c
diff options
context:
space:
mode:
authorAlexey Skidanov <alexey.skidanov@gmail.com>2015-04-14 18:05:49 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2015-05-19 13:02:27 +0300
commit59d3e8be87a14c6a0d91c683e63d9b31734525ae (patch)
tree1f23bcca62cdfcec10055b83463d51129ec0d590 /drivers/gpu/drm/amd/amdkfd/kfd_events.c
parentf3a398183f7b9ef78f6b71ee9f7641e046403bcb (diff)
downloadop-kernel-dev-59d3e8be87a14c6a0d91c683e63d9b31734525ae.zip
op-kernel-dev-59d3e8be87a14c6a0d91c683e63d9b31734525ae.tar.gz
drm/amdkfd: Add memory exception handling
This patch adds Peripheral Page Request (PPR) failure processing and reporting. Bad address or pointer to a system memory block with inappropriate read/write permission cause such PPR failure during a user queue processing. PPR request handling is done by IOMMU driver notifying AMDKFD module on PPR failure. The process triggering a PPR failure will be notified by appropriate event or SIGTERM signal will be sent to it. v3: - Change all bool fields in struct kfd_memory_exception_failure to uint32_t Signed-off-by: Alexey Skidanov <alexey.skidanov@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_events.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c155
1 files changed, 147 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 23ffa96..fa13d3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -30,6 +30,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
+#include <linux/device.h>
/*
* A task can only be on a single wait_queue at a time, but we need to support
@@ -45,6 +46,10 @@ struct kfd_event_waiter {
/* Transitions to true when the event this belongs to is signaled. */
bool activated;
+
+ /* Event */
+ struct kfd_event *event;
+ uint32_t input_index;
};
/*
@@ -609,14 +614,17 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
}
static int init_event_waiter(struct kfd_process *p,
- struct kfd_event_waiter *waiter,
- uint32_t event_id)
+ struct kfd_event_waiter *waiter,
+ uint32_t event_id,
+ uint32_t input_index)
{
struct kfd_event *ev = lookup_event_by_id(p, event_id);
if (!ev)
return -EINVAL;
+ waiter->event = ev;
+ waiter->input_index = input_index;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
@@ -643,6 +651,38 @@ static bool test_event_condition(bool all, uint32_t num_events,
return activated_count == num_events;
}
+/*
+ * Copy event specific data, if defined.
+ * Currently only memory exception events have additional data to copy to user
+ */
+static bool copy_signaled_event_data(uint32_t num_events,
+ struct kfd_event_waiter *event_waiters,
+ struct kfd_event_data __user *data)
+{
+ struct kfd_hsa_memory_exception_data *src;
+ struct kfd_hsa_memory_exception_data __user *dst;
+ struct kfd_event_waiter *waiter;
+ struct kfd_event *event;
+ uint32_t i;
+
+ for (i = 0; i < num_events; i++) {
+ waiter = &event_waiters[i];
+ event = waiter->event;
+ if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
+ dst = &data[waiter->input_index].memory_exception_data;
+ src = &event->memory_exception_data;
+ if (copy_to_user(dst, src,
+ sizeof(struct kfd_hsa_memory_exception_data)))
+ return false;
+ }
+ }
+
+ return true;
+
+}
+
+
+
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -672,10 +712,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
}
int kfd_wait_on_events(struct kfd_process *p,
- uint32_t num_events, const uint32_t __user *event_ids,
+ uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result)
{
+ struct kfd_event_data __user *events =
+ (struct kfd_event_data __user *) data;
uint32_t i;
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
@@ -690,13 +732,14 @@ int kfd_wait_on_events(struct kfd_process *p,
}
for (i = 0; i < num_events; i++) {
- uint32_t event_id;
+ struct kfd_event_data event_data;
- ret = get_user(event_id, &event_ids[i]);
- if (ret)
+ if (copy_from_user(&event_data, &events[i],
+ sizeof(struct kfd_event_data)))
goto fail;
- ret = init_event_waiter(p, &event_waiters[i], event_id);
+ ret = init_event_waiter(p, &event_waiters[i],
+ event_data.event_id, i);
if (ret)
goto fail;
}
@@ -723,7 +766,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (test_event_condition(all, num_events, event_waiters)) {
- *wait_result = KFD_WAIT_COMPLETE;
+ if (copy_signaled_event_data(num_events,
+ event_waiters, events))
+ *wait_result = KFD_WAIT_COMPLETE;
+ else
+ *wait_result = KFD_WAIT_ERROR;
break;
}
@@ -797,3 +844,95 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
return remap_pfn_range(vma, vma->vm_start, pfn,
vma->vm_end - vma->vm_start, vma->vm_page_prot);
}
+
+/*
+ * Assumes that p->event_mutex is held and of course
+ * that p is not going away (current or locked).
+ */
+static void lookup_events_by_type_and_signal(struct kfd_process *p,
+ int type, void *event_data)
+{
+ struct kfd_hsa_memory_exception_data *ev_data;
+ struct kfd_event *ev;
+ int bkt;
+ bool send_signal = true;
+
+ ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+
+ hash_for_each(p->events, bkt, ev, events)
+ if (ev->type == type) {
+ send_signal = false;
+ dev_dbg(kfd_device,
+ "Event found: id %X type %d",
+ ev->event_id, ev->type);
+ set_event(ev);
+ if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
+ ev->memory_exception_data = *ev_data;
+ }
+
+ /* Send SIGTERM no event of type "type" has been found*/
+ if (send_signal) {
+ dev_warn(kfd_device,
+ "Sending SIGTERM to HSA Process with PID %d ",
+ p->lead_thread->pid);
+ send_sig(SIGTERM, p->lead_thread, 0);
+ }
+}
+
+void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
+ unsigned long address, bool is_write_requested,
+ bool is_execute_requested)
+{
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct vm_area_struct *vma;
+
+ /*
+ * Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function returns a locked process.
+ */
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return; /* Presumably process exited. */
+
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+
+ down_read(&p->mm->mmap_sem);
+ vma = find_vma(p->mm, address);
+
+ memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.va = address;
+ /* Set failure reason */
+ memory_exception_data.failure.NotPresent = 1;
+ memory_exception_data.failure.NoExecute = 0;
+ memory_exception_data.failure.ReadOnly = 0;
+ if (vma) {
+ if (vma->vm_start > address) {
+ memory_exception_data.failure.NotPresent = 1;
+ memory_exception_data.failure.NoExecute = 0;
+ memory_exception_data.failure.ReadOnly = 0;
+ } else {
+ memory_exception_data.failure.NotPresent = 0;
+ if (is_write_requested && !(vma->vm_flags & VM_WRITE))
+ memory_exception_data.failure.ReadOnly = 1;
+ else
+ memory_exception_data.failure.ReadOnly = 0;
+ if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
+ memory_exception_data.failure.NoExecute = 1;
+ else
+ memory_exception_data.failure.NoExecute = 0;
+ }
+ }
+
+ up_read(&p->mm->mmap_sem);
+
+ mutex_lock(&p->event_mutex);
+
+ /* Lookup events by type and signal them */
+ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
+ &memory_exception_data);
+
+ mutex_unlock(&p->event_mutex);
+ mutex_unlock(&p->mutex);
+}
OpenPOWER on IntegriCloud