summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c535
1 files changed, 534 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 62c3d9c..cd679cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
+#include <linux/file.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
return 0;
}
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_process_apertures_new_args *args = data;
+ struct kfd_process_device_apertures *pa;
+ struct kfd_process_device *pdd;
+ uint32_t nodes = 0;
+ int ret;
+
+ dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+ if (args->num_of_nodes == 0) {
+ /* Return number of nodes, so that user space can alloacate
+ * sufficient memory
+ */
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p))
+ goto out_unlock;
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ args->num_of_nodes++;
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd);
+
+ goto out_unlock;
+ }
+
+ /* Fill in process-aperture information for all available
+ * nodes, but not more than args->num_of_nodes as that is
+ * the amount of memory allocated by user
+ */
+ pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+ args->num_of_nodes), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p)) {
+ args->num_of_nodes = 0;
+ kfree(pa);
+ goto out_unlock;
+ }
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ pa[nodes].gpu_id = pdd->dev->id;
+ pa[nodes].lds_base = pdd->lds_base;
+ pa[nodes].lds_limit = pdd->lds_limit;
+ pa[nodes].gpuvm_base = pdd->gpuvm_base;
+ pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+ pa[nodes].scratch_base = pdd->scratch_base;
+ pa[nodes].scratch_limit = pdd->scratch_limit;
+
+ dev_dbg(kfd_device,
+ "gpu id %u\n", pdd->dev->id);
+ dev_dbg(kfd_device,
+ "lds_base %llX\n", pdd->lds_base);
+ dev_dbg(kfd_device,
+ "lds_limit %llX\n", pdd->lds_limit);
+ dev_dbg(kfd_device,
+ "gpuvm_base %llX\n", pdd->gpuvm_base);
+ dev_dbg(kfd_device,
+ "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+ dev_dbg(kfd_device,
+ "scratch_base %llX\n", pdd->scratch_base);
+ dev_dbg(kfd_device,
+ "scratch_limit %llX\n", pdd->scratch_limit);
+ nodes++;
+
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd && (nodes < args->num_of_nodes));
+ mutex_unlock(&p->mutex);
+
+ args->num_of_nodes = nodes;
+ ret = copy_to_user(
+ (void __user *)args->kfd_process_device_apertures_ptr,
+ pa,
+ (nodes * sizeof(struct kfd_process_device_apertures)));
+ kfree(pa);
+ return ret ? -EFAULT : 0;
+
+out_unlock:
+ mutex_unlock(&p->mutex);
+ return 0;
+}
+
static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_event_args *args = data;
int err;
+ /* For dGPUs the event page is allocated in user mode. The
+ * handle is passed to KFD with the first call to this IOCTL
+ * through the event_page_offset field.
+ */
+ if (args->event_page_offset) {
+ struct kfd_dev *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
+ if (!kfd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto out_unlock;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n",
+ args->event_page_offset);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ mutex_unlock(&p->mutex);
+
+ err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
+ mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ return err;
+ }
+ }
+
err = kfd_event_create(filp, p, args->event_type,
args->auto_reset != 0, args->node_id,
&args->event_id, &args->event_trigger_data,
@@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_slot_index);
return err;
+
+out_unlock:
+ mutex_unlock(&p->mutex);
+ return err;
}
static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
@@ -901,7 +1049,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
mutex_unlock(&p->mutex);
- if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+ if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
+ pdd->qpd.vmid != 0)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
@@ -954,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
return 0;
}
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+ void *data)
+{
+ struct kfd_ioctl_acquire_vm_args *args = data;
+ struct kfd_process_device *pdd;
+ struct kfd_dev *dev;
+ struct file *drm_file;
+ int ret;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ drm_file = fget(args->drm_fd);
+ if (!drm_file)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (pdd->drm_file) {
+ ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_device_init_vm(pdd, drm_file);
+ if (ret)
+ goto err_unlock;
+ /* On success, the PDD keeps the drm_file reference */
+ mutex_unlock(&p->mutex);
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&p->mutex);
+ fput(drm_file);
+ return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+{
+ struct kfd_local_mem_info mem_info;
+
+ if (debug_largebar) {
+ pr_debug("Simulate large-bar allocation on non large-bar machine\n");
+ return true;
+ }
+
+ if (dev->device_info->needs_iommu_device)
+ return false;
+
+ dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
+ if (mem_info.local_mem_size_private == 0 &&
+ mem_info.local_mem_size_public > 0)
+ return true;
+ return false;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+ struct kfd_process_device *pdd;
+ void *mem;
+ struct kfd_dev *dev;
+ int idr_handle;
+ long err;
+ uint64_t offset = args->mmap_offset;
+ uint32_t flags = args->flags;
+
+ if (args->size == 0)
+ return -EINVAL;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+ (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+ !kfd_dev_is_large_bar(dev)) {
+ pr_err("Alloc host visible vram on small bar is not allowed\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto err_unlock;
+ }
+
+ err = dev->kfd2kgd->alloc_memory_of_gpu(
+ dev->kgd, args->va_addr, args->size,
+ pdd->vm, (struct kgd_mem **) &mem, &offset,
+ flags);
+
+ if (err)
+ goto err_unlock;
+
+ idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+ if (idr_handle < 0) {
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ mutex_unlock(&p->mutex);
+
+ args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+ args->mmap_offset = offset;
+
+ return 0;
+
+err_free:
+ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+ mutex_unlock(&p->mutex);
+ return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+ struct kfd_process_device *pdd;
+ void *mem;
+ struct kfd_dev *dev;
+ int ret;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ mem = kfd_process_device_translate_handle(
+ pdd, GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+
+ /* If freeing the buffer failed, leave the handle in place for
+ * clean-up during process tear-down.
+ */
+ if (!ret)
+ kfd_process_device_remove_obj_handle(
+ pdd, GET_IDR_HANDLE(args->handle));
+
+err_unlock:
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+ struct kfd_process_device *pdd, *peer_pdd;
+ void *mem;
+ struct kfd_dev *dev, *peer;
+ long err = 0;
+ int i;
+ uint32_t *devices_arr = NULL;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ if (!args->n_devices) {
+ pr_debug("Device IDs array empty\n");
+ return -EINVAL;
+ }
+ if (args->n_success > args->n_devices) {
+ pr_debug("n_success exceeds n_devices\n");
+ return -EINVAL;
+ }
+
+ devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+ GFP_KERNEL);
+ if (!devices_arr)
+ return -ENOMEM;
+
+ err = copy_from_user(devices_arr,
+ (void __user *)args->device_ids_array_ptr,
+ args->n_devices * sizeof(*devices_arr));
+ if (err != 0) {
+ err = -EFAULT;
+ goto copy_from_user_failed;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto bind_process_to_device_failed;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ err = -ENOMEM;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ for (i = args->n_success; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (!peer) {
+ pr_debug("Getting device by id failed for 0x%x\n",
+ devices_arr[i]);
+ err = -EINVAL;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd)) {
+ err = PTR_ERR(peer_pdd);
+ goto get_mem_obj_from_handle_failed;
+ }
+ err = peer->kfd2kgd->map_memory_to_gpu(
+ peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ if (err) {
+ pr_err("Failed to map to gpu %d/%d\n",
+ i, args->n_devices);
+ goto map_memory_to_gpu_failed;
+ }
+ args->n_success = i+1;
+ }
+
+ mutex_unlock(&p->mutex);
+
+ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ if (err) {
+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ goto sync_memory_failed;
+ }
+
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd);
+ }
+
+ kfree(devices_arr);
+
+ return err;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+ mutex_unlock(&p->mutex);
+copy_from_user_failed:
+sync_memory_failed:
+ kfree(devices_arr);
+
+ return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+ struct kfd_process_device *pdd, *peer_pdd;
+ void *mem;
+ struct kfd_dev *dev, *peer;
+ long err = 0;
+ uint32_t *devices_arr = NULL, i;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ if (!args->n_devices) {
+ pr_debug("Device IDs array empty\n");
+ return -EINVAL;
+ }
+ if (args->n_success > args->n_devices) {
+ pr_debug("n_success exceeds n_devices\n");
+ return -EINVAL;
+ }
+
+ devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+ GFP_KERNEL);
+ if (!devices_arr)
+ return -ENOMEM;
+
+ err = copy_from_user(devices_arr,
+ (void __user *)args->device_ids_array_ptr,
+ args->n_devices * sizeof(*devices_arr));
+ if (err != 0) {
+ err = -EFAULT;
+ goto copy_from_user_failed;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ err = PTR_ERR(pdd);
+ goto bind_process_to_device_failed;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ err = -ENOMEM;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ for (i = args->n_success; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (!peer) {
+ err = -EINVAL;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (!peer_pdd) {
+ err = -ENODEV;
+ goto get_mem_obj_from_handle_failed;
+ }
+ err = dev->kfd2kgd->unmap_memory_to_gpu(
+ peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ if (err) {
+ pr_err("Failed to unmap from gpu %d/%d\n",
+ i, args->n_devices);
+ goto unmap_memory_from_gpu_failed;
+ }
+ args->n_success = i+1;
+ }
+ kfree(devices_arr);
+
+ mutex_unlock(&p->mutex);
+
+ return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+ mutex_unlock(&p->mutex);
+copy_from_user_failed:
+ kfree(devices_arr);
+ return err;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1016,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
kfd_ioctl_set_trap_handler, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+ kfd_ioctl_get_process_apertures_new, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+ kfd_ioctl_acquire_vm, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+ kfd_ioctl_alloc_memory_of_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+ kfd_ioctl_free_memory_of_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+ kfd_ioctl_map_memory_to_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+ kfd_ioctl_unmap_memory_from_gpu, 0),
+
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
OpenPOWER on IntegriCloud